package de.unihd.dbs.uima.reader.eventi2014reader;

import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
import de.unihd.dbs.uima.annotator.heideltime.utilities.Toolbox;
import de.unihd.dbs.uima.types.heideltime.Dct;
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

/* loaded from: input_file:BOOT-INF/lib/heideltime-2.2.1.jar:de/unihd/dbs/uima/reader/eventi2014reader/Eventi2014Reader.class */
public class Eventi2014Reader extends CollectionReader_ImplBase {
    private Class<?> component = getClass();
    private String PARAM_INPUTDIR = "InputDirectory";
    private Integer numberOfDocuments = 0;
    private HashSet<String> hsNoSpaceBefore = new HashSet<>();
    private HashSet<String> hsNoSpaceBehind = new HashSet<>();
    private Queue<File> files = new LinkedList();

    @Override // org.apache.uima.collection.CollectionReader_ImplBase
    public void initialize() throws ResourceInitializationException {
        String trim = ((String) getConfigParameterValue(this.PARAM_INPUTDIR)).trim();
        this.hsNoSpaceBefore.add(".");
        this.hsNoSpaceBefore.add(",");
        this.hsNoSpaceBefore.add(":");
        this.hsNoSpaceBefore.add(";");
        this.hsNoSpaceBefore.add("?");
        this.hsNoSpaceBefore.add("!");
        this.hsNoSpaceBefore.add(")");
        this.hsNoSpaceBehind.add("(");
        populateFileList(trim);
    }

    @Override // org.apache.uima.collection.CollectionReader
    public void getNext(CAS cas) throws IOException, CollectionException {
        try {
            fillJCas(cas.getJCas());
            System.err.print(".");
        } catch (CASException e) {
            throw new CollectionException(e);
        }
    }

    private void fillJCas(JCas jCas) throws IOException, CollectionException {
        int length;
        int length2;
        String str = "";
        String str2 = "";
        String str3 = "";
        String str4 = "";
        String str5 = "";
        int i = 0;
        int i2 = -1;
        for (String str6 : FileUtils.file2String(this.files.poll()).split("\n")) {
            if (str6.startsWith("<Document doc_name=")) {
                Iterator<MatchResult> it = Toolbox.findMatches(Pattern.compile("<Document doc_name=\"(.*?)\">"), str6).iterator();
                while (it.hasNext()) {
                    str4 = it.next().group(1);
                }
            }
            if (str6.startsWith("<token")) {
                for (MatchResult matchResult : Toolbox.findMatches(Pattern.compile("<token t_id=\"(.*?)\" sentence=\"(.*?)\" number=\"(.*?)\">(.*?)</token>"), str6)) {
                    String group = matchResult.group(4);
                    int parseInt = Integer.parseInt(matchResult.group(1));
                    int parseInt2 = Integer.parseInt(matchResult.group(2));
                    int parseInt3 = Integer.parseInt(matchResult.group(3));
                    if (str.equals("")) {
                        length = 0;
                        length2 = group.length();
                        str = group;
                        str5 = group;
                    } else if (this.hsNoSpaceBefore.contains(group)) {
                        length = str.length();
                        length2 = length + group.length();
                        str = str + group;
                        str5 = group;
                    } else {
                        if (this.hsNoSpaceBehind.contains(str5)) {
                            length = str.length();
                            str = str + group;
                        } else {
                            length = str.length() + 1;
                            str = str + " " + group;
                        }
                        length2 = length + group.length();
                        str5 = group;
                    }
                    if (parseInt3 == 0) {
                        if (i2 >= 0) {
                            addSentenceAnnotation(jCas, i, i2, str4);
                        }
                        i = length;
                    }
                    addTokenAnnotation(jCas, length, length2, parseInt, str4, parseInt2, parseInt3);
                    i2 = length2;
                }
            }
            if (str6.startsWith("<TIMEX3")) {
                for (MatchResult matchResult2 : Toolbox.findMatches(Pattern.compile("(<TIMEX3 .*? TAG_DESCRIPTOR=\"D[CP]T\" .*? value=\"(.*?)\".*?/>)"), str6)) {
                    str2 = matchResult2.group(1);
                    str3 = matchResult2.group(2);
                    System.err.println("DCT: " + str3);
                }
            }
        }
        addSentenceAnnotation(jCas, i, i2, str4);
        jCas.setDocumentText(str);
        if (str3.equals("")) {
            return;
        }
        Dct dct = new Dct(jCas);
        dct.setBegin(0);
        dct.setEnd(str.length());
        dct.setFilename(str4 + "---" + str2);
        dct.setValue(str3);
        dct.addToIndexes();
    }

    public void addSentenceAnnotation(JCas jCas, int i, int i2, String str) {
        Sentence sentence = new Sentence(jCas);
        sentence.setBegin(i);
        sentence.setEnd(i2);
        sentence.setFilename(str);
        sentence.addToIndexes();
    }

    public void addTokenAnnotation(JCas jCas, int i, int i2, int i3, String str, int i4, int i5) {
        Token token = new Token(jCas);
        token.setBegin(i);
        token.setEnd(i2);
        token.setTokenId(i3);
        token.setFilename(str + "---" + i4 + "---" + i5);
        token.addToIndexes();
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public boolean hasNext() throws IOException, CollectionException {
        return this.files.size() > 0;
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.numberOfDocuments.intValue() - this.files.size(), this.numberOfDocuments.intValue(), "entities")};
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public void close() throws IOException {
        this.files.clear();
    }

    private void populateFileList(String str) throws ResourceInitializationException {
        ArrayList arrayList = new ArrayList();
        File file = new File(str);
        if (!file.exists() || !file.isDirectory()) {
            throw new ResourceInitializationException();
        }
        arrayList.addAll(Arrays.asList(file.listFiles()));
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            File file2 = (File) it.next();
            if (file2.exists() && file2.isFile() && file2.canRead()) {
                this.files.add(file2);
            } else {
                Logger.printDetail(this.component, "File \"" + file2.getAbsolutePath() + "\" was ignored because it either didn't exist, wasn't a file or wasn't readable.");
            }
        }
        this.numberOfDocuments = Integer.valueOf(this.files.size());
    }
}
