package de.unihd.dbs.uima.reader.tempeval2reader;

import de.unihd.dbs.uima.types.heideltime.Dct;
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

/* loaded from: input_file:BOOT-INF/lib/heideltime-2.2.1.jar:de/unihd/dbs/uima/reader/tempeval2reader/Tempeval2Reader.class */
public class Tempeval2Reader extends CollectionReader_ImplBase {
    private static Logger logger = null;
    private static final String compontent_id = "de.unihd.dbs.uima.reader.tempeval2reader";
    public static final String FILE_BASE_SEGMENTATION = "base-segmentation.tab";
    public static final String FILE_DCT = "dct.tab";
    public static final String PARAM_INPUTDIR = "InputDirectory";
    public static final String PARAM_CHARSET = "Charset";
    public static final String PARAM_USE_SPACES = "UseSpacesAsSeparators";
    private int currentIndex;
    public Integer numberOfDocuments = 0;
    public HashMap<String, Token> hmToken = new HashMap<>();
    public HashMap<String, Dct> hmDct = new HashMap<>();
    private List<String> filenames = new ArrayList();
    Boolean resettingParentheses = true;
    int newTokSentNumber = 0;
    Charset charset = null;
    Boolean USE_SPACES = true;

    @Override // org.apache.uima.collection.CollectionReader_ImplBase
    public void initialize() throws ResourceInitializationException {
        String str = (String) getConfigParameterValue(PARAM_CHARSET);
        if (str == null || str.equals("")) {
            str = "UTF-8";
        }
        try {
            this.charset = Charset.forName(str);
            if (((Boolean) getConfigParameterValue(PARAM_USE_SPACES)).booleanValue()) {
                this.USE_SPACES = true;
            } else {
                this.USE_SPACES = false;
            }
            this.numberOfDocuments = getNumberOfDocuments(getFilesFromInputDirectory());
            System.err.println("[de.unihd.dbs.uima.reader.tempeval2reader] number of documents: " + this.numberOfDocuments);
        } catch (Exception e) {
            System.err.println("[de.unihd.dbs.uima.reader.tempeval2reader] Charset " + str + " was not available to be used.");
            throw new ResourceInitializationException();
        }
    }

    @Override // org.apache.uima.collection.CollectionReader
    public void getNext(CAS cas) throws IOException, CollectionException {
        try {
            JCas jCas = cas.getJCas();
            this.hmToken.clear();
            this.hmDct.clear();
            List<String> list = this.filenames;
            int i = this.currentIndex;
            this.currentIndex = i + 1;
            String str = list.get(i);
            List<File> filesFromInputDirectory = getFilesFromInputDirectory();
            setTextSentencesTokens(str, filesFromInputDirectory, jCas);
            setDocumentCreationTime(str, filesFromInputDirectory, jCas);
        } catch (CASException e) {
            throw new CollectionException(e);
        }
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public boolean hasNext() throws IOException, CollectionException {
        return this.currentIndex < this.numberOfDocuments.intValue();
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.currentIndex, this.numberOfDocuments.intValue(), "entities")};
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public void close() throws IOException {
    }

    public void setDocumentCreationTime(String str, List<File> list, JCas jCas) throws IOException {
        String str2 = ((String) getConfigParameterValue("InputDirectory")) + "/" + FILE_DCT;
        for (File file : list) {
            if (file.getAbsolutePath().equals(str2)) {
                try {
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), this.charset));
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        String[] split = readLine.split("(\t)+");
                        String str3 = split[0];
                        if (str3.equals(str)) {
                            String str4 = split[1];
                            Dct dct = new Dct(jCas);
                            String documentText = jCas.getDocumentText();
                            dct.setBegin(0);
                            dct.setEnd(documentText.length());
                            dct.setFilename(str3);
                            dct.setValue(str4);
                            dct.setTimexId("t0");
                            dct.addToIndexes();
                            this.hmDct.put("t0", dct);
                        }
                    }
                    bufferedReader.close();
                } catch (IOException e) {
                    throw new IOException(e);
                }
            }
        }
    }

    public void setTextSentencesTokens(String str, List<File> list, JCas jCas) throws IOException {
        String str2 = "";
        String str3 = "";
        Integer num = 0;
        Integer num2 = -1;
        String str4 = ((String) getConfigParameterValue("InputDirectory")) + "/" + FILE_BASE_SEGMENTATION;
        for (File file : list) {
            if (file.getAbsolutePath().equals(str4)) {
                try {
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), this.charset));
                    Boolean bool = false;
                    Boolean bool2 = false;
                    String str5 = "";
                    Boolean bool3 = true;
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        if (bool3.booleanValue()) {
                            this.newTokSentNumber = Integer.parseInt(readLine.split("\t")[1]);
                        }
                        bool3 = false;
                        String[] split = readLine.split("\t");
                        str5 = split[0];
                        Integer valueOf = Integer.valueOf(Integer.parseInt(split[1]));
                        Integer valueOf2 = Integer.valueOf(Integer.parseInt(split[2]));
                        String str6 = split.length >= 4 ? split[3] : "";
                        if (this.resettingParentheses.booleanValue()) {
                            str6 = resetParentheses(str6);
                        }
                        if (str5.equals(str)) {
                            if (valueOf.intValue() == this.newTokSentNumber && valueOf2.intValue() == this.newTokSentNumber) {
                                bool2 = true;
                                str2 = str6;
                                str3 = str6;
                                num = addTokenAnnotation(str6, str5, valueOf, valueOf2, num, jCas);
                            } else if (valueOf2.intValue() == this.newTokSentNumber || num2 != valueOf) {
                                Integer addSentenceAnnotation = addSentenceAnnotation(str3, str5, Integer.valueOf(valueOf.intValue() - 1), num, jCas);
                                str2 = !this.USE_SPACES.booleanValue() ? str2 + str6 : str2 + " " + str6;
                                str3 = str6;
                                num = addTokenAnnotation(str6, str5, valueOf, valueOf2, addSentenceAnnotation, jCas);
                            } else {
                                if (this.USE_SPACES.booleanValue()) {
                                    str2 = str2 + " " + str6;
                                    str3 = str3 + " " + str6;
                                } else {
                                    str2 = str2 + str6;
                                    str3 = str3 + str6;
                                }
                                num = addTokenAnnotation(str6, str5, valueOf, valueOf2, num, jCas);
                            }
                        } else if (bool2.booleanValue() && !bool.booleanValue()) {
                            num = addSentenceAnnotation(str3, str, num2, num, jCas);
                            bool = true;
                        }
                        num2 = valueOf;
                    }
                    if (str5.equals(str)) {
                        num = addSentenceAnnotation(str3, str, num2, num, jCas);
                    }
                    bufferedReader.close();
                } catch (IOException e) {
                    throw new IOException(e);
                }
            }
        }
        jCas.setDocumentText(str2);
    }

    public String resetParentheses(String str) {
        if (str.equals("-LRB-")) {
            str = str.replace("-LRB-", "(");
        } else if (str.equals("-RRB-")) {
            str = str.replace("-RRB-", ")");
        } else if (str.equals("-LSB-")) {
            str = str.replace("-LSB-", "[");
        } else if (str.equals("-RSB-")) {
            str = str.replace("-RSB-", "]");
        } else if (str.equals("-LCB-")) {
            str = str.replace("-LCB-", "{");
        } else if (str.equals("-RCB-")) {
            str = str.replace("-RCB-", "}");
        } else if (str.endsWith("a'")) {
            str = str.replaceFirst("a'", "à");
        } else if (str.endsWith("i'")) {
            str = str.replaceFirst("i'", "ì");
        } else if (str.endsWith("e'")) {
            str = str.replaceFirst("e'", "è");
        } else if (str.endsWith("u'")) {
            str = str.replaceFirst("u'", "ù");
        } else if (str.endsWith("o'")) {
            str = str.replaceFirst("o'", "ò");
        }
        return str;
    }

    public Integer addSentenceAnnotation(String str, String str2, Integer num, Integer num2, JCas jCas) {
        Sentence sentence = new Sentence(jCas);
        Integer valueOf = Integer.valueOf(num2.intValue() - str.length());
        sentence.setFilename(str2);
        sentence.setSentenceId(num.intValue());
        sentence.setBegin(valueOf.intValue());
        sentence.setEnd(num2.intValue());
        sentence.addToIndexes();
        return num2;
    }

    public Integer addTokenAnnotation(String str, String str2, Integer num, Integer num2, Integer num3, JCas jCas) {
        Token token = new Token(jCas);
        if ((num.intValue() != this.newTokSentNumber || num2.intValue() != this.newTokSentNumber) && this.USE_SPACES.booleanValue()) {
            num3 = Integer.valueOf(num3.intValue() + 1);
        }
        token.setBegin(num3.intValue());
        Integer valueOf = Integer.valueOf(num3.intValue() + str.length());
        token.setEnd(valueOf.intValue());
        token.setTokenId(num2.intValue());
        token.setSentId(num.intValue());
        token.setFilename(str2);
        token.addToIndexes();
        this.hmToken.put(str2 + "_" + num + "_" + num2, token);
        return valueOf;
    }

    private Integer getNumberOfDocuments(List<File> list) throws ResourceInitializationException {
        String str = ((String) getConfigParameterValue("InputDirectory")) + "/" + FILE_BASE_SEGMENTATION;
        for (File file : list) {
            if (file.getAbsolutePath().equals(str)) {
                try {
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), this.charset));
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        String str2 = readLine.split("\t")[0];
                        if (!this.filenames.contains(str2)) {
                            this.filenames.add(str2);
                        }
                    }
                    bufferedReader.close();
                } catch (IOException e) {
                    throw new ResourceInitializationException(e);
                }
            }
        }
        return Integer.valueOf(this.filenames.size());
    }

    private List<File> getFilesFromInputDirectory() {
        File file = new File(((String) getConfigParameterValue("InputDirectory")).trim());
        ArrayList arrayList = new ArrayList();
        if (!file.exists() || !file.isDirectory()) {
            logger.log(Level.WARNING, "getFilesFromInputDirectory() " + file + " does not exist. Client has to set configuration parameter 'InputDirectory'.");
            return null;
        }
        File[] listFiles = file.listFiles();
        for (int i = 0; i < listFiles.length; i++) {
            if (!listFiles[i].isDirectory()) {
                arrayList.add(listFiles[i]);
            }
        }
        return arrayList;
    }
}
