package edu.stanford.nlp.dcoref;

import com.ibm.icu.text.PluralRules;
import edu.stanford.nlp.dcoref.CorefCoreAnnotations;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.ChunkAnnotationUtils;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.IntCounter;
import edu.stanford.nlp.time.SUTime;
import edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory;
import edu.stanford.nlp.trees.ModCollinsHeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.Trees;
import edu.stanford.nlp.util.AbstractIterator;
import edu.stanford.nlp.util.CollectionFactory;
import edu.stanford.nlp.util.CollectionValuedMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.util.Set;
import java.util.Stack;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.eclipse.jgit.transport.WalkEncryption;
import org.springframework.beans.factory.support.PropertiesBeanDefinitionReader;

/* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/dcoref/CoNLL2011DocumentReader.class */
public class CoNLL2011DocumentReader {
    private static final int FIELD_LAST = -1;
    private static final int FIELD_DOC_ID = 0;
    private static final int FIELD_PART_NO = 1;
    private static final int FIELD_WORD_NO = 2;
    private static final int FIELD_WORD = 3;
    private static final int FIELD_POS_TAG = 4;
    private static final int FIELD_PARSE_BIT = 5;
    private static final int FIELD_SPEAKER_AUTHOR = 9;
    private static final int FIELD_NER_TAG = 10;
    private static final int FIELD_COREF = -1;
    private static final int FIELDS_MIN = 12;
    private DocumentIterator docIterator;
    protected final List<File> fileList;
    private int curFileIndex;
    private final Options options;
    private static Redwood.RedwoodChannels log = Redwood.channels(CoNLL2011DocumentReader.class);
    public static final Logger logger = Logger.getLogger(CoNLL2011DocumentReader.class.getName());

    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/dcoref/CoNLL2011DocumentReader$CorefMentionAnnotation.class */
    public static class CorefMentionAnnotation implements CoreAnnotation<CoreMap> {
        @Override // edu.stanford.nlp.ling.CoreAnnotation
        public Class<CoreMap> getType() {
            return CoreMap.class;
        }
    }

    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/dcoref/CoNLL2011DocumentReader$CorpusStats.class */
    public static class CorpusStats {
        IntCounter<String> mentionTreeLabelCounter = new IntCounter<>();
        IntCounter<String> mentionTreeNonPretermLabelCounter = new IntCounter<>();
        IntCounter<String> mentionTreePretermNonPretermNoMatchLabelCounter = new IntCounter<>();
        IntCounter<String> mentionTreeMixedLabelCounter = new IntCounter<>();
        IntCounter<Integer> mentionTokenLengthCounter = new IntCounter<>();
        IntCounter<Integer> nerMentionTokenLengthCounter = new IntCounter<>();
        int mentionExactTreeSpan = 0;
        int nonPretermSpanMatches = 0;
        int totalMentions = 0;
        int nestedNerMentions = 0;
        int nerMentions = 0;

        public void process(Document document) {
            List list = (List) document.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class);
            Iterator<String> it = document.corefChainMap.keySet().iterator();
            while (it.hasNext()) {
                for (CoreMap coreMap : document.corefChainMap.get((Object) it.next())) {
                    CoreMap coreMap2 = (CoreMap) list.get(((Integer) coreMap.get(CoreAnnotations.SentenceIndexAnnotation.class)).intValue());
                    Tree tree = (Tree) coreMap2.get(TreeCoreAnnotations.TreeAnnotation.class);
                    Tree tree2 = (Tree) coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
                    Tree tree3 = tree2;
                    Tree tree4 = tree2;
                    if (tree3.isPreTerminal()) {
                        tree3 = tree3.parent(tree);
                    }
                    int intValue = ((Integer) coreMap2.get(CoreAnnotations.TokenBeginAnnotation.class)).intValue();
                    int intValue2 = ((Integer) coreMap.get(CoreAnnotations.TokenBeginAnnotation.class)).intValue() - intValue;
                    int intValue3 = ((Integer) coreMap.get(CoreAnnotations.TokenEndAnnotation.class)).intValue() - intValue;
                    int i = intValue3 - intValue2;
                    this.mentionTokenLengthCounter.incrementCount(Integer.valueOf(i));
                    IntPair span = tree2.getSpan();
                    if (span == null) {
                        CoNLL2011DocumentReader.logger.warning("No span for " + tree2);
                    } else if (span.getSource() == intValue2 && span.getTarget() == intValue3 - 1) {
                        this.mentionExactTreeSpan++;
                    } else {
                        CoNLL2011DocumentReader.logger.info("Tree span is " + span + ", tree node is " + tree2);
                        CoNLL2011DocumentReader.logger.info("Mention span is " + intValue2 + " " + (intValue3 - 1) + ", mention is " + coreMap);
                    }
                    IntPair span2 = tree3.getSpan();
                    if (span2.getSource() == intValue2 && span2.getTarget() == intValue3 - 1) {
                        this.nonPretermSpanMatches++;
                        tree4 = tree3;
                    } else {
                        this.mentionTreePretermNonPretermNoMatchLabelCounter.incrementCount(tree2.label().value());
                        CoNLL2011DocumentReader.logger.info("NPT: Tree span is " + span + ", tree node is " + tree3);
                        CoNLL2011DocumentReader.logger.info("NPT: Mention span is " + intValue2 + " " + (intValue3 - 1) + ", mention is " + coreMap);
                        if (tree2.label() instanceof CoreLabel) {
                            Iterator<CoreMap> it2 = document.corefChainMap.get(((CoreMap) ((CoreLabel) r0).get(CorefMentionAnnotation.class)).get(CorefCoreAnnotations.CorefAnnotation.class)).iterator();
                            while (it2.hasNext()) {
                                CoNLL2011DocumentReader.logger.info("NPT: Clustered mention " + ((String) it2.next().get(CoreAnnotations.TextAnnotation.class)));
                            }
                        }
                    }
                    this.totalMentions++;
                    this.mentionTreeLabelCounter.incrementCount(tree2.label().value());
                    this.mentionTreeNonPretermLabelCounter.incrementCount(tree3.label().value());
                    this.mentionTreeMixedLabelCounter.incrementCount(tree4.label().value());
                    Label label = tree2.label();
                    if ((label instanceof CoreLabel) && ((CoreLabel) label).containsKey(NamedEntityAnnotation.class)) {
                        this.nerMentions++;
                        this.nerMentionTokenLengthCounter.incrementCount(Integer.valueOf(i));
                        Tree parent = tree2.parent(tree);
                        while (true) {
                            Tree tree5 = parent;
                            if (tree5 != null) {
                                Label label2 = tree5.label();
                                if ((label2 instanceof CoreLabel) && ((CoreLabel) label2).containsKey(NamedEntityAnnotation.class)) {
                                    CoNLL2011DocumentReader.logger.info("NER Mention: " + coreMap);
                                    CoNLL2011DocumentReader.logger.info("Nested inside NER Mention: " + ((CoreMap) ((CoreLabel) label2).get(NamedEntityAnnotation.class)));
                                    CoNLL2011DocumentReader.logger.info("Nested inside NER Mention parent node: " + tree5);
                                    this.nestedNerMentions++;
                                    break;
                                }
                                parent = tree5.parent(tree);
                            }
                        }
                    }
                }
            }
        }

        private static void appendFrac(StringBuilder sb, String str, int i, int i2) {
            sb.append(str).append("\t").append(i / i2).append("\t(").append(i).append("/").append(i2).append(")");
        }

        private static <E> void appendIntCountStats(StringBuilder sb, String str, IntCounter<E> intCounter) {
            sb.append(str).append("\n");
            List sortedList = Counters.toSortedList(intCounter);
            int i = intCounter.totalIntCount();
            for (E e : sortedList) {
                appendFrac(sb, e.toString(), intCounter.getIntCount(e), i);
                sb.append("\n");
            }
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            appendIntCountStats(sb, "Mention Tree Labels (no preterminals)", this.mentionTreeNonPretermLabelCounter);
            sb.append("\n");
            appendIntCountStats(sb, "Mention Tree Labels (with preterminals)", this.mentionTreeLabelCounter);
            sb.append("\n");
            appendIntCountStats(sb, "Mention Tree Labels (preterminals with parent span not match)", this.mentionTreePretermNonPretermNoMatchLabelCounter);
            sb.append("\n");
            appendIntCountStats(sb, "Mention Tree Labels (mixed)", this.mentionTreeMixedLabelCounter);
            sb.append("\n");
            appendIntCountStats(sb, "Mention Lengths", this.mentionTokenLengthCounter);
            sb.append("\n");
            appendFrac(sb, "Mention Exact Non Preterm Tree Span", this.nonPretermSpanMatches, this.totalMentions);
            sb.append("\n");
            appendFrac(sb, "Mention Exact Tree Span", this.mentionExactTreeSpan, this.totalMentions);
            sb.append("\n");
            appendFrac(sb, "NER", this.nerMentions, this.totalMentions);
            sb.append("\n");
            appendFrac(sb, "Nested NER", this.nestedNerMentions, this.totalMentions);
            sb.append("\n");
            appendIntCountStats(sb, "NER Mention Lengths", this.nerMentionTokenLengthCounter);
            return sb.toString();
        }
    }

    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/dcoref/CoNLL2011DocumentReader$Document.class */
    public static class Document {
        String documentIdPart;
        String documentID;
        String partNo;
        List<List<String[]>> sentenceWordLists = new ArrayList();
        Annotation annotation;
        CollectionValuedMap<String, CoreMap> corefChainMap;
        List<CoreMap> nerChunks;

        public String getDocumentID() {
            return this.documentID;
        }

        public void setDocumentID(String str) {
            this.documentID = str;
        }

        public String getPartNo() {
            return this.partNo;
        }

        public void setPartNo(String str) {
            this.partNo = str;
        }

        public List<List<String[]>> getSentenceWordLists() {
            return this.sentenceWordLists;
        }

        public void addSentence(List<String[]> list) {
            this.sentenceWordLists.add(list);
        }

        public Annotation getAnnotation() {
            return this.annotation;
        }

        public void setAnnotation(Annotation annotation) {
            this.annotation = annotation;
        }

        public CollectionValuedMap<String, CoreMap> getCorefChainMap() {
            return this.corefChainMap;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/dcoref/CoNLL2011DocumentReader$DocumentIterator.class */
    public static class DocumentIterator extends AbstractIterator<Document> implements Closeable {
        private static final Pattern delimiterPattern;
        private static final LabeledScoredTreeReaderFactory treeReaderFactory;
        private final Options options;
        String filename;
        BufferedReader br;
        private static final Pattern starPattern;
        private static final String ASTERISK = "*";
        private static final String HYPHEN = "-";
        private static final String docStart = "#begin document ";
        private static final int docStartLength;
        static final /* synthetic */ boolean $assertionsDisabled;
        int lineCnt = 0;
        int docCnt = 0;
        Document nextDoc = readNextDocument();

        public DocumentIterator(String str, Options options) throws IOException {
            this.options = options;
            this.filename = str;
            this.br = IOUtils.readerFromString(str);
        }

        @Override // edu.stanford.nlp.util.AbstractIterator, java.util.Iterator
        public boolean hasNext() {
            return this.nextDoc != null;
        }

        @Override // edu.stanford.nlp.util.AbstractIterator, java.util.Iterator
        public Document next() {
            if (this.nextDoc == null) {
                throw new NoSuchElementException("DocumentIterator exhausted.");
            }
            Document document = this.nextDoc;
            this.nextDoc = readNextDocument();
            return document;
        }

        private static Tree wordsToParse(List<String[]> list) {
            StringBuilder sb = new StringBuilder();
            for (String[] strArr : list) {
                if (sb.length() > 0) {
                    sb.append(' ');
                }
                String replace = strArr[5].replace("NOPARSE", SUTime.PAD_FIELD_UNKNOWN);
                String str = "(" + strArr[4] + " " + strArr[3] + ")";
                int indexOf = replace.indexOf(42);
                sb.append(replace.substring(0, indexOf));
                sb.append(str);
                sb.append(replace.substring(indexOf + 1));
                if (replace.indexOf(42, indexOf + 1) >= 0) {
                    CoNLL2011DocumentReader.logger.warning(" Parse bit with multiple *: " + replace);
                }
            }
            return Tree.valueOf(sb.toString(), treeReaderFactory);
        }

        private static List<Triple<Integer, Integer, String>> getCorefSpans(List<String[]> list) {
            return getLabelledSpans(list, -1, "-", true);
        }

        private static List<Triple<Integer, Integer, String>> getNerSpans(List<String[]> list) {
            return getLabelledSpans(list, 10, "*", false);
        }

        private static List<Triple<Integer, Integer, String>> getLabelledSpans(List<String[]> list, int i, String str, boolean z) {
            ArrayList arrayList = new ArrayList();
            Stack stack = new Stack();
            boolean equals = "*".equals(str);
            for (int i2 = 0; i2 < list.size(); i2++) {
                String field = CoNLL2011DocumentReader.getField(list.get(i2), i);
                if (!str.equals(field)) {
                    int i3 = -1;
                    int i4 = -1;
                    for (int i5 = 0; i5 < field.length(); i5++) {
                        char charAt = field.charAt(i5);
                        boolean z2 = false;
                        if (charAt == '(' || charAt == ')' || charAt == '|') {
                            if (i3 >= 0) {
                                String substring = field.substring(i3 + 1, i5);
                                if (equals) {
                                    substring = starPattern.matcher(substring).replaceAll("");
                                }
                                stack.push(new Triple(Integer.valueOf(i2), -1, substring));
                                i3 = -1;
                            }
                            z2 = true;
                        }
                        if (charAt == '(') {
                            i3 = i5;
                        } else if (charAt == ')') {
                            Triple triple = (Triple) stack.pop();
                            if (z) {
                                String substring2 = field.substring(i4 + 1, i5);
                                if (!substring2.equals(triple.third())) {
                                    Stack stack2 = new Stack();
                                    while (!substring2.equals(triple.third())) {
                                        stack2.push(triple);
                                        if (stack.isEmpty()) {
                                            throw new RuntimeException("Cannot find matching labelled span for " + substring2);
                                        }
                                        triple = (Triple) stack.pop();
                                    }
                                    while (!stack2.isEmpty()) {
                                        stack.push(stack2.pop());
                                    }
                                    if (!$assertionsDisabled && !substring2.equals(triple.third())) {
                                        throw new AssertionError();
                                    }
                                }
                            }
                            triple.setSecond(Integer.valueOf(i2));
                            arrayList.add(triple);
                        }
                        if (z2) {
                            i4 = i5;
                        }
                    }
                    if (i3 >= 0) {
                        String substring3 = field.substring(i3 + 1, field.length());
                        if (equals) {
                            substring3 = starPattern.matcher(substring3).replaceAll("");
                        }
                        stack.push(new Triple(Integer.valueOf(i2), -1, substring3));
                    }
                }
            }
            if (stack.size() != 0) {
                throw new RuntimeException("Error extracting labelled spans for column " + i + PluralRules.KEYWORD_RULE_SEPARATOR + CoNLL2011DocumentReader.concatField(list, i));
            }
            return arrayList;
        }

        private CoreMap wordsToSentence(List<String[]> list) {
            Annotation annotation = new Annotation(CoNLL2011DocumentReader.concatField(list, 3));
            Tree wordsToParse = wordsToParse(list);
            annotation.set(TreeCoreAnnotations.TreeAnnotation.class, wordsToParse);
            List<Tree> leaves = wordsToParse.getLeaves();
            if (!$assertionsDisabled && leaves.size() != list.size()) {
                throw new AssertionError();
            }
            ArrayList<CoreLabel> arrayList = new ArrayList(leaves.size());
            annotation.set(CoreAnnotations.TokensAnnotation.class, arrayList);
            for (int i = 0; i < list.size(); i++) {
                String[] strArr = list.get(i);
                int parseInt = Integer.parseInt(strArr[2]);
                if (!$assertionsDisabled && parseInt != i) {
                    throw new AssertionError();
                }
                CoreLabel coreLabel = (CoreLabel) ((Tree) leaves.get(i)).label();
                arrayList.add(coreLabel);
                if (this.options.annotateTokenSpeaker) {
                    String replace = strArr[9].replace("_", " ");
                    if (!"-".equals(replace)) {
                        coreLabel.set(CoreAnnotations.SpeakerAnnotation.class, replace);
                    }
                }
            }
            if (this.options.annotateTokenPos) {
                for (Tree tree : leaves) {
                    ((CoreLabel) tree.label()).set(CoreAnnotations.PartOfSpeechAnnotation.class, tree.parent(wordsToParse).value());
                }
            }
            if (this.options.annotateTokenNer) {
                for (Triple<Integer, Integer, String> triple : getNerSpans(list)) {
                    int intValue = triple.first().intValue();
                    int intValue2 = triple.second().intValue();
                    String third = triple.third();
                    for (int i2 = intValue; i2 <= intValue2; i2++) {
                        CoreLabel coreLabel2 = (CoreLabel) ((Tree) leaves.get(i2)).label();
                        String str = (String) coreLabel2.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                        if (str != null) {
                            CoNLL2011DocumentReader.logger.warning("Replacing old named entity tag " + str + " with " + third);
                        }
                        coreLabel2.set(CoreAnnotations.NamedEntityTagAnnotation.class, third);
                    }
                }
                for (CoreLabel coreLabel3 : arrayList) {
                    if (!coreLabel3.containsKey(CoreAnnotations.NamedEntityTagAnnotation.class)) {
                        coreLabel3.set(CoreAnnotations.NamedEntityTagAnnotation.class, this.options.backgroundNerTag);
                    }
                }
            }
            if (this.options.annotateTokenCoref) {
                for (Triple<Integer, Integer, String> triple2 : getCorefSpans(list)) {
                    int intValue3 = triple2.first().intValue();
                    int intValue4 = triple2.second().intValue();
                    String third2 = triple2.third();
                    int i3 = intValue3;
                    while (i3 <= intValue4) {
                        CoreLabel coreLabel4 = (CoreLabel) ((Tree) leaves.get(i3)).label();
                        String str2 = third2;
                        if (this.options.useCorefBIOESEncoding) {
                            str2 = (intValue3 == intValue4 ? AbstractBottomUpParser.START : i3 == intValue3 ? "B-" : i3 == intValue4 ? "E-" : "I-") + third2;
                        }
                        String str3 = (String) coreLabel4.get(CorefCoreAnnotations.CorefAnnotation.class);
                        if (str3 != null) {
                            str2 = str3 + "|" + str2;
                        }
                        coreLabel4.set(CorefCoreAnnotations.CorefAnnotation.class, str2);
                        i3++;
                    }
                }
            }
            return annotation;
        }

        public static Annotation sentencesToDocument(String str, List<CoreMap> list) {
            Annotation annotation = new Annotation((String) null);
            annotation.set(CoreAnnotations.DocIDAnnotation.class, str);
            annotation.set(CoreAnnotations.SentencesAnnotation.class, list);
            ArrayList<CoreLabel> arrayList = new ArrayList();
            int i = 0;
            int i2 = 0;
            for (CoreMap coreMap : list) {
                List list2 = (List) coreMap.get(CoreAnnotations.TokensAnnotation.class);
                arrayList.addAll(list2);
                int size = i2 + list2.size();
                coreMap.set(CoreAnnotations.TokenBeginAnnotation.class, Integer.valueOf(i2));
                coreMap.set(CoreAnnotations.TokenEndAnnotation.class, Integer.valueOf(size));
                coreMap.set(CoreAnnotations.SentenceIndexAnnotation.class, Integer.valueOf(i));
                i++;
                i2 = size;
            }
            annotation.set(CoreAnnotations.TokensAnnotation.class, arrayList);
            int i3 = 0;
            for (CoreLabel coreLabel : arrayList) {
                String str2 = (String) coreLabel.get(CoreAnnotations.TextAnnotation.class);
                coreLabel.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, Integer.valueOf(i3));
                int length = i3 + str2.length();
                coreLabel.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, Integer.valueOf(length));
                i3 = length + 1;
            }
            for (CoreMap coreMap2 : list) {
                List list3 = (List) coreMap2.get(CoreAnnotations.TokensAnnotation.class);
                coreMap2.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, ((CoreLabel) list3.get(0)).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
                coreMap2.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, ((CoreLabel) list3.get(list3.size() - 1)).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
            }
            return annotation;
        }

        private static Tree getLowestCommonAncestor(Tree tree, int i, int i2) {
            return Trees.getLowestCommonAncestor(Trees.getLeaf(tree, i), Trees.getLeaf(tree, i2), tree);
        }

        private static Tree getTreeNonTerminal(Tree tree, int i, int i2, boolean z) {
            Tree lowestCommonAncestor = getLowestCommonAncestor(tree, i, i2);
            if (lowestCommonAncestor.isLeaf()) {
                lowestCommonAncestor = lowestCommonAncestor.parent(tree);
            }
            if (!z && lowestCommonAncestor.isPreTerminal()) {
                lowestCommonAncestor = lowestCommonAncestor.parent(tree);
            }
            return lowestCommonAncestor;
        }

        public void annotateDocument(Document document) {
            ArrayList arrayList = new ArrayList(document.sentenceWordLists.size());
            Iterator<List<String[]>> it = document.sentenceWordLists.iterator();
            while (it.hasNext()) {
                arrayList.add(wordsToSentence(it.next()));
            }
            document.setAnnotation(sentencesToDocument(document.documentIdPart, arrayList));
            CollectionValuedMap<String, CoreMap> collectionValuedMap = new CollectionValuedMap<>((CollectionFactory<CoreMap>) CollectionFactory.arrayListFactory());
            ArrayList arrayList2 = new ArrayList();
            for (int i = 0; i < arrayList.size(); i++) {
                CoreMap coreMap = (CoreMap) arrayList.get(i);
                Tree tree = (Tree) coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
                tree.setSpans();
                List<String[]> list = document.sentenceWordLists.get(i);
                for (Triple<Integer, Integer, String> triple : getNerSpans(list)) {
                    int intValue = triple.first().intValue();
                    int intValue2 = triple.second().intValue();
                    String third = triple.third();
                    Annotation annotatedChunk = ChunkAnnotationUtils.getAnnotatedChunk(coreMap, intValue, intValue2 + 1);
                    annotatedChunk.set(CoreAnnotations.NamedEntityTagAnnotation.class, third);
                    annotatedChunk.set(CoreAnnotations.SentenceIndexAnnotation.class, coreMap.get(CoreAnnotations.SentenceIndexAnnotation.class));
                    arrayList2.add(annotatedChunk);
                    Tree treeNonTerminal = getTreeNonTerminal(tree, intValue, intValue2, true);
                    if (treeNonTerminal.getSpan().getSource() == intValue && treeNonTerminal.getSpan().getTarget() == intValue2) {
                        annotatedChunk.set(TreeCoreAnnotations.TreeAnnotation.class, treeNonTerminal);
                        if (this.options.annotateTreeNer) {
                            Label label = treeNonTerminal.label();
                            if (label instanceof CoreLabel) {
                                ((CoreLabel) label).set(NamedEntityAnnotation.class, annotatedChunk);
                            }
                        }
                    }
                }
                for (Triple<Integer, Integer, String> triple2 : getCorefSpans(list)) {
                    int intValue3 = triple2.first().intValue();
                    int intValue4 = triple2.second().intValue();
                    String third2 = triple2.third();
                    Annotation annotatedChunk2 = ChunkAnnotationUtils.getAnnotatedChunk(coreMap, intValue3, intValue4 + 1);
                    annotatedChunk2.set(CorefCoreAnnotations.CorefAnnotation.class, third2);
                    annotatedChunk2.set(CoreAnnotations.SentenceIndexAnnotation.class, coreMap.get(CoreAnnotations.SentenceIndexAnnotation.class));
                    collectionValuedMap.add(third2, annotatedChunk2);
                    Tree treeNonTerminal2 = getTreeNonTerminal(tree, intValue3, intValue4, true);
                    annotatedChunk2.set(TreeCoreAnnotations.TreeAnnotation.class, treeNonTerminal2);
                    if (this.options.annotateTreeCoref) {
                        Label label2 = treeNonTerminal2.label();
                        if (label2 instanceof CoreLabel) {
                            ((CoreLabel) label2).set(CorefMentionAnnotation.class, annotatedChunk2);
                        }
                    }
                }
            }
            document.corefChainMap = collectionValuedMap;
            document.nerChunks = arrayList2;
        }

        public Document readNextDocument() {
            try {
                ArrayList arrayList = new ArrayList();
                Document document = null;
                while (true) {
                    String readLine = this.br.readLine();
                    if (readLine == null) {
                        return null;
                    }
                    this.lineCnt++;
                    String trim = readLine.trim();
                    if (trim.length() != 0) {
                        if (trim.startsWith(docStart)) {
                            if (document != null) {
                                CoNLL2011DocumentReader.logger.warning("Unexpected begin document at line (\" + filename + \",\" + lineCnt + \")");
                            }
                            document = new Document();
                            document.documentIdPart = trim.substring(docStartLength);
                        } else {
                            if (trim.startsWith("#end document")) {
                                annotateDocument(document);
                                this.docCnt++;
                                return document;
                            }
                            if (!$assertionsDisabled && document == null) {
                                throw new AssertionError();
                            }
                            String[] split = delimiterPattern.split(trim);
                            if (split.length < 12) {
                                throw new RuntimeException("Unexpected number of field " + split.length + ", expected >= 12 for line (" + this.filename + "," + this.lineCnt + "): " + trim);
                            }
                            String str = split[0];
                            String str2 = split[1];
                            if (document.getDocumentID() == null) {
                                document.setDocumentID(str);
                                document.setPartNo(str2);
                            } else {
                                if (!$assertionsDisabled && !document.getDocumentID().equals(str)) {
                                    throw new AssertionError();
                                }
                                if (!$assertionsDisabled && !document.getPartNo().equals(str2)) {
                                    throw new AssertionError();
                                }
                            }
                            arrayList.add(split);
                        }
                    } else if (arrayList.size() <= 0) {
                        continue;
                    } else {
                        if (!$assertionsDisabled && document == null) {
                            throw new AssertionError();
                        }
                        document.addSentence(arrayList);
                        arrayList = new ArrayList();
                    }
                }
            } catch (IOException e) {
                throw new RuntimeIOException(e);
            }
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() {
            IOUtils.closeIgnoringExceptions(this.br);
        }

        static {
            $assertionsDisabled = !CoNLL2011DocumentReader.class.desiredAssertionStatus();
            delimiterPattern = Pattern.compile(WalkEncryption.Vals.REGEX_WS);
            treeReaderFactory = new LabeledScoredTreeReaderFactory((TreeNormalizer) null);
            starPattern = Pattern.compile("\\*");
            docStartLength = docStart.length();
        }
    }

    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/dcoref/CoNLL2011DocumentReader$NamedEntityAnnotation.class */
    public static class NamedEntityAnnotation implements CoreAnnotation<CoreMap> {
        @Override // edu.stanford.nlp.ling.CoreAnnotation
        public Class<CoreMap> getType() {
            return CoreMap.class;
        }
    }

    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/dcoref/CoNLL2011DocumentReader$Options.class */
    public static class Options {
        public boolean useCorefBIOESEncoding;
        public boolean annotateTokenCoref;
        public boolean annotateTokenSpeaker;
        public boolean annotateTokenPos;
        public boolean annotateTokenNer;
        public boolean annotateTreeCoref;
        public boolean annotateTreeNer;
        public String backgroundNerTag;
        protected String fileFilter;
        protected Pattern filePattern;
        protected boolean sortFiles;

        public Options() {
            this(".*_gold_conll$");
        }

        public Options(String str) {
            this.useCorefBIOESEncoding = false;
            this.annotateTokenCoref = true;
            this.annotateTokenSpeaker = true;
            this.annotateTokenPos = true;
            this.annotateTokenNer = true;
            this.annotateTreeCoref = false;
            this.annotateTreeNer = false;
            this.backgroundNerTag = "O";
            this.fileFilter = str;
            this.filePattern = Pattern.compile(this.fileFilter);
        }

        public void setFilter(String str) {
            this.fileFilter = str;
            this.filePattern = Pattern.compile(this.fileFilter);
        }
    }

    public CoNLL2011DocumentReader(String str) {
        this(str, new Options());
    }

    public CoNLL2011DocumentReader(String str, Options options) {
        this.fileList = getFiles(str, options.filePattern);
        this.options = options;
        if (options.sortFiles) {
            Collections.sort(this.fileList);
        }
        this.curFileIndex = 0;
        logger.info("Reading " + this.fileList.size() + " CoNll2011 files from " + str);
    }

    private static List<File> getFiles(String str, Pattern pattern) {
        Iterable<File> iterFilesRecursive = IOUtils.iterFilesRecursive(new File(str), pattern);
        ArrayList arrayList = new ArrayList();
        Iterator<File> it = iterFilesRecursive.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next());
        }
        Collections.sort(arrayList);
        return arrayList;
    }

    public void reset() {
        this.curFileIndex = 0;
        if (this.docIterator != null) {
            this.docIterator.close();
            this.docIterator = null;
        }
    }

    public Document getNextDocument() {
        try {
            if (this.curFileIndex >= this.fileList.size()) {
                return null;
            }
            File file = this.fileList.get(this.curFileIndex);
            if (this.docIterator == null) {
                this.docIterator = new DocumentIterator(file.getAbsolutePath(), this.options);
            }
            while (!this.docIterator.hasNext()) {
                logger.info("Processed " + this.docIterator.docCnt + " documents in " + file.getAbsolutePath());
                this.docIterator.close();
                this.curFileIndex++;
                if (this.curFileIndex >= this.fileList.size()) {
                    return null;
                }
                file = this.fileList.get(this.curFileIndex);
                this.docIterator = new DocumentIterator(file.getAbsolutePath(), this.options);
            }
            Document next = this.docIterator.next();
            SieveCoreferenceSystem.logger.fine("Reading document: " + next.getDocumentID());
            return next;
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    public void close() {
        IOUtils.closeIgnoringExceptions(this.docIterator);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String getField(String[] strArr, int i) {
        return i == -1 ? strArr[strArr.length - 1] : strArr[i];
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String concatField(List<String[]> list, int i) {
        StringBuilder sb = new StringBuilder();
        for (String[] strArr : list) {
            if (sb.length() > 0) {
                sb.append(' ');
            }
            sb.append(getField(strArr, i));
        }
        return sb.toString();
    }

    public static void usage() {
        log.info("java edu.stanford.nlp.dcoref.CoNLL2011DocumentReader [-ext <extension to match>] -i <inputpath> -o <outputfile>");
    }

    public static Pair<Integer, Integer> getMention(Integer num, String str, List<CoreLabel> list) {
        Integer num2 = -1;
        Integer num3 = num;
        for (CoreLabel coreLabel : list) {
            num2 = Integer.valueOf(num2.intValue() + 1);
            if (num2.intValue() > num.intValue()) {
                String str2 = (String) coreLabel.get(CorefCoreAnnotations.CorefAnnotation.class);
                if (str2 == null || !Arrays.asList(str2.split("\\|")).contains(str)) {
                    break;
                }
                num3 = num2;
            }
        }
        return Pair.makePair(num, num3);
    }

    public static boolean include(Map<Pair<Integer, Integer>, String> map, Pair<Integer, Integer> pair, String str) {
        for (Pair<Integer, Integer> pair2 : map.keySet()) {
            String str2 = map.get(pair2);
            if (str2 != null && str2.equals(str) && pair2.first.intValue() < pair.first.intValue() && pair2.second.equals(pair.second)) {
                return true;
            }
        }
        return false;
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static void writeTabSep(PrintWriter printWriter, CoreMap coreMap, CollectionValuedMap<String, CoreMap> collectionValuedMap) {
        ModCollinsHeadFinder modCollinsHeadFinder = new ModCollinsHeadFinder();
        List<CoreLabel> list = (List) coreMap.get(CoreAnnotations.TokensAnnotation.class);
        Tree tree = (Tree) coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
        Map newHashMap = Generics.newHashMap();
        Set<Tree> subTrees = tree.subTrees();
        tree.setSpans();
        Map newHashMap2 = Generics.newHashMap();
        Map newHashMap3 = Generics.newHashMap();
        for (Tree tree2 : subTrees) {
            IntPair span = tree2.getSpan();
            if (span != null) {
                newHashMap2.put(Pair.makePair(Integer.valueOf(span.getSource()), Integer.valueOf(span.getTarget())), tree2);
                newHashMap3.put(Pair.makePair(Integer.valueOf(span.getSource()), Integer.valueOf(span.getTarget())), tree2.getLeaves());
            }
        }
        String[] strArr = new String[list.size()];
        Map newHashMap4 = Generics.newHashMap();
        int i = -1;
        for (CoreLabel coreLabel : list) {
            i++;
            String word = coreLabel.word();
            String tag = coreLabel.tag();
            String ner = coreLabel.ner();
            String str = (String) coreLabel.get(CorefCoreAnnotations.CorefAnnotation.class);
            strArr[i] = new String[4];
            strArr[i][0] = word;
            strArr[i][1] = tag;
            strArr[i][2] = ner;
            strArr[i][3] = str;
            if (str == null) {
                newHashMap.put(Pair.makePair(Integer.valueOf(i), Integer.valueOf(i)), str);
                strArr[i][3] = "O";
            } else {
                for (String str2 : str.split("\\|")) {
                    Pair<Integer, Integer> mention = getMention(Integer.valueOf(i), str2, list);
                    if (!include(newHashMap, mention, str2)) {
                        newHashMap.put(mention, str2);
                        Tree tree3 = (Tree) newHashMap2.get(mention);
                        String str3 = null;
                        if (tree3 != null) {
                            str3 = tree3.headTerminal(modCollinsHeadFinder).nodeString();
                        } else if (mention.first.equals(mention.second)) {
                            str3 = word;
                        }
                        newHashMap4.put(mention, str3);
                    }
                }
                if (newHashMap4.values().contains(word)) {
                    strArr[i][3] = "MENTION";
                } else {
                    strArr[i][3] = "O";
                }
            }
        }
        for (int i2 = 0; i2 < strArr.length; i2++) {
            String[] strArr2 = strArr[i2];
            if (i2 < strArr.length - 1) {
                Object[] objArr = strArr[i2 + 1];
                if (objArr[3].equals("MENTION") && objArr[0].equals("'s")) {
                    strArr2[3] = "MENTION";
                    strArr[i2 + 1][3] = "O";
                }
            }
            printWriter.println(strArr2[0] + "\t" + strArr2[1] + "\t" + strArr2[2] + "\t" + strArr2[3]);
        }
        printWriter.println("");
    }

    public static void main(String[] strArr) throws IOException {
        Properties argsToProperties = StringUtils.argsToProperties(strArr);
        boolean parseBoolean = Boolean.parseBoolean(argsToProperties.getProperty("debug", "false"));
        String property = argsToProperties.getProperty("i");
        String property2 = argsToProperties.getProperty("o");
        if (property == null || property2 == null) {
            usage();
            System.exit(-1);
        }
        PrintWriter printWriter = new PrintWriter(property2);
        logger.info("Writing to " + property2);
        String property3 = argsToProperties.getProperty("ext");
        Options options = property3 != null ? new Options(".*" + property3 + PropertiesBeanDefinitionReader.CONSTRUCTOR_ARG_PREFIX) : new Options();
        options.annotateTreeCoref = true;
        options.annotateTreeNer = true;
        CorpusStats corpusStats = new CorpusStats();
        CoNLL2011DocumentReader coNLL2011DocumentReader = new CoNLL2011DocumentReader(property, options);
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        while (true) {
            Document nextDocument = coNLL2011DocumentReader.getNextDocument();
            if (nextDocument == null) {
                printWriter.close();
                System.out.println("Total document count: " + i);
                System.out.println("Total sentence count: " + i2);
                System.out.println("Total token count: " + i3);
                System.out.println(corpusStats);
                return;
            }
            corpusStats.process(nextDocument);
            i++;
            Annotation annotation = nextDocument.getAnnotation();
            if (parseBoolean) {
                System.out.println("Document " + i + PluralRules.KEYWORD_RULE_SEPARATOR + ((String) annotation.get(CoreAnnotations.DocIDAnnotation.class)));
            }
            for (CoreMap coreMap : (List) annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                if (parseBoolean) {
                    System.out.println("Parse: " + coreMap.get(TreeCoreAnnotations.TreeAnnotation.class));
                }
                if (parseBoolean) {
                    System.out.println("Sentence Tokens: " + StringUtils.join((Iterable) coreMap.get(CoreAnnotations.TokensAnnotation.class), ","));
                }
                writeTabSep(printWriter, coreMap, nextDocument.corefChainMap);
                i2++;
                i3 += ((List) coreMap.get(CoreAnnotations.TokensAnnotation.class)).size();
            }
            if (parseBoolean) {
                Iterator<CoreMap> it = nextDocument.nerChunks.iterator();
                while (it.hasNext()) {
                    System.out.println("NER Chunk: " + it.next());
                }
                for (String str : nextDocument.corefChainMap.keySet()) {
                    System.out.println("Coref: " + str + " = " + StringUtils.join(nextDocument.corefChainMap.get((Object) str), ";"));
                }
            }
        }
    }
}
