package de.unihd.dbs.uima.annotator.treetagger;

import de.unihd.dbs.uima.annotator.heideltime.resources.Language;
import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
import de.unihd.dbs.uima.annotator.treetagger.TreeTaggerTokenizer;
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.impl.RootUimaContext_impl;
import org.apache.uima.jcas.JCas;
import org.apache.uima.pear.tools.InstallationController;
import org.apache.uima.resource.ConfigurationManager;
import org.apache.uima.resource.impl.ConfigurationManager_impl;
import org.apache.uima.resource.impl.ResourceManager_impl;
import org.eclipse.jgit.transport.WalkEncryption;

/* loaded from: input_file:BOOT-INF/lib/heideltime-2.2.1.jar:de/unihd/dbs/uima/annotator/treetagger/TreeTaggerWrapper.class */
public class TreeTaggerWrapper extends JCasAnnotator_ImplBase {
    public static final String PARAM_LANGUAGE = "language";
    public static final String PARAM_ANNOTATE_TOKENS = "annotate_tokens";
    public static final String PARAM_ANNOTATE_SENTENCES = "annotate_sentences";
    public static final String PARAM_ANNOTATE_PARTOFSPEECH = "annotate_partofspeech";
    public static final String PARAM_IMPROVE_GERMAN_SENTENCES = "improvegermansentences";
    public static final String PARAM_CHINESE_TOKENIZER_PATH = "ChineseTokenizerPath";
    private Language language;
    private TreeTaggerWriter ttwriter;
    private TreeTaggerReader ttreader;
    private Class<?> component = getClass();
    private Boolean annotate_tokens = false;
    private Boolean annotate_sentences = false;
    private Boolean annotate_partofspeech = false;
    private TreeTaggerProperties ttprops = new TreeTaggerProperties();
    private TreeTaggerProcess ttProc = null;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:BOOT-INF/lib/heideltime-2.2.1.jar:de/unihd/dbs/uima/annotator/treetagger/TreeTaggerWrapper$TreeTaggerContext.class */
    public class TreeTaggerContext extends RootUimaContext_impl {
        private ConfigurationManager mConfigManager;

        public TreeTaggerContext(TreeTaggerWrapper treeTaggerWrapper, Language language, Boolean bool, Boolean bool2, Boolean bool3, Boolean bool4) {
            this(language, bool, bool2, bool3, bool4, null);
        }

        public TreeTaggerContext(Language language, Boolean bool, Boolean bool2, Boolean bool3, Boolean bool4, String str) {
            this.mConfigManager = new ConfigurationManager_impl();
            initializeRoot(null, new ResourceManager_impl(), this.mConfigManager);
            this.mConfigManager.setSession(getSession());
            this.mConfigManager.setConfigParameterValue(makeQualifiedName("language"), language.getName());
            this.mConfigManager.setConfigParameterValue(makeQualifiedName("annotate_tokens"), bool);
            this.mConfigManager.setConfigParameterValue(makeQualifiedName("annotate_partofspeech"), bool3);
            this.mConfigManager.setConfigParameterValue(makeQualifiedName("annotate_sentences"), bool2);
            this.mConfigManager.setConfigParameterValue(makeQualifiedName("ChineseTokenizerPath"), str);
        }

        @Override // org.apache.uima.impl.RootUimaContext_impl, org.apache.uima.UimaContextAdmin
        public ConfigurationManager getConfigurationManager() {
            return this.mConfigManager;
        }
    }

    public void initialize(Language language, String str, Boolean bool, Boolean bool2, Boolean bool3, Boolean bool4) {
        initialize(language, str, bool, bool2, bool3, bool4, null);
    }

    public void initialize(Language language, String str, Boolean bool, Boolean bool2, Boolean bool3, Boolean bool4, String str2) {
        setHome(str);
        initialize(new TreeTaggerContext(language, bool, bool2, bool3, bool4, str2));
    }

    @Override // org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) {
        this.language = Language.getLanguageFromString((String) uimaContext.getConfigParameterValue("language"));
        this.annotate_tokens = (Boolean) uimaContext.getConfigParameterValue("annotate_tokens");
        this.annotate_sentences = (Boolean) uimaContext.getConfigParameterValue("annotate_sentences");
        this.annotate_partofspeech = (Boolean) uimaContext.getConfigParameterValue("annotate_partofspeech");
        String str = (String) uimaContext.getConfigParameterValue("ChineseTokenizerPath");
        this.ttprops.languageName = this.language.getTreeTaggerLangName();
        if (this.ttprops.rootPath == null) {
            this.ttprops.rootPath = System.getenv("TREETAGGER_HOME");
        }
        this.ttprops.tokScriptName = "utf8-tokenize.perl";
        if (new File(this.ttprops.rootPath + this.ttprops.fileSeparator + InstallationController.PACKAGE_LIB_DIR, this.ttprops.languageName + "-utf8.par").exists()) {
            this.ttprops.parFileName = this.ttprops.languageName + "-utf8.par";
        } else {
            this.ttprops.parFileName = this.ttprops.languageName + ".par";
        }
        if (new File(this.ttprops.rootPath + this.ttprops.fileSeparator + InstallationController.PACKAGE_LIB_DIR, this.ttprops.languageName + "-abbreviations-utf8").exists()) {
            this.ttprops.abbFileName = this.ttprops.languageName + "-abbreviations-utf8";
        } else {
            this.ttprops.abbFileName = this.ttprops.languageName + "-abbreviations";
        }
        this.ttprops.languageSwitch = this.language.getTreeTaggerSwitch();
        if (str == null || str.equals("")) {
            this.ttprops.chineseTokenizerPath = new File(this.ttprops.rootPath, "cmd");
        } else {
            this.ttprops.chineseTokenizerPath = new File(str);
        }
        if (this.ttprops.rootPath == null) {
            Logger.printError("TreeTagger environment variable is not present, aborting.");
            System.exit(-1);
        }
        File file = new File(this.ttprops.rootPath + this.ttprops.fileSeparator + InstallationController.PACKAGE_LIB_DIR, this.ttprops.abbFileName);
        File file2 = new File(this.ttprops.rootPath + this.ttprops.fileSeparator + InstallationController.PACKAGE_LIB_DIR, this.ttprops.parFileName);
        File file3 = new File(this.ttprops.rootPath + this.ttprops.fileSeparator + "cmd", this.ttprops.tokScriptName);
        Boolean valueOf = Boolean.valueOf(file.exists());
        Boolean bool = valueOf;
        if (!valueOf.booleanValue()) {
            if (this.language.equals(Language.CHINESE) || this.language.equals(Language.RUSSIAN)) {
                bool = true;
                this.ttprops.abbFileName = null;
            } else {
                Logger.printError(this.component, "File missing to use TreeTagger tokenizer: " + this.ttprops.abbFileName);
            }
        }
        Boolean valueOf2 = Boolean.valueOf(file2.exists());
        if (!valueOf2.booleanValue()) {
            Logger.printError(this.component, "File missing to use TreeTagger tokenizer: " + this.ttprops.parFileName);
        }
        Boolean valueOf3 = Boolean.valueOf(file3.exists());
        Boolean bool2 = valueOf3;
        if (!valueOf3.booleanValue()) {
            if (this.language.equals(Language.CHINESE)) {
                bool2 = true;
            } else {
                Logger.printError(this.component, "File missing to use TreeTagger tokenizer: " + this.ttprops.tokScriptName);
            }
        }
        if (bool.booleanValue() && valueOf2.booleanValue() && bool2.booleanValue()) {
            return;
        }
        Logger.printError(this.component, "Cannot find tree tagger (" + this.ttprops.rootPath + this.ttprops.fileSeparator + "cmd" + this.ttprops.fileSeparator + this.ttprops.tokScriptName + "). Make sure that path to tree tagger is set correctly in config.props!");
        Logger.printError(this.component, "If path is set correctly:");
        Logger.printError(this.component, "Maybe you need to download the TreeTagger tagger-scripts.tar.gz");
        Logger.printError(this.component, "from http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/data/tagger-scripts.tar.gz");
        Logger.printError(this.component, "Extract this file and copy the missing file into the corresponding TreeTagger directories.");
        Logger.printError(this.component, "If missing, copy " + this.ttprops.abbFileName + " into " + this.ttprops.rootPath + this.ttprops.fileSeparator + InstallationController.PACKAGE_LIB_DIR);
        Logger.printError(this.component, "If missing, copy " + this.ttprops.parFileName + " into " + this.ttprops.rootPath + this.ttprops.fileSeparator + InstallationController.PACKAGE_LIB_DIR);
        Logger.printError(this.component, "If missing, copy " + this.ttprops.tokScriptName + " into " + this.ttprops.rootPath + this.ttprops.fileSeparator + "cmd");
        System.exit(-1);
    }

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        if (this.annotate_tokens.booleanValue()) {
            if (this.language.equals(Language.CHINESE)) {
                tokenizeChinese(jCas);
            } else {
                tokenize(jCas);
            }
        }
        if (this.annotate_partofspeech.booleanValue()) {
            doTreeTag(jCas);
        }
        if (this.language == Language.GERMAN) {
            improveGermanSentences(jCas);
        }
        if (this.language == Language.FRENCH) {
            improveFrenchSentences(jCas);
        }
    }

    private void tokenize(JCas jCas) {
        Logger.printDetail(this.component, "TreeTagger (tokenization) with: " + this.ttprops.abbFileName);
        EnumSet<TreeTaggerTokenizer.Flag> set = TreeTaggerTokenizer.Flag.getSet(this.ttprops.languageSwitch);
        this.ttprops.abbFileName = "english-abbreviations";
        int i = 0;
        for (String str : (this.ttprops.abbFileName != null ? new TreeTaggerTokenizer(this.ttprops.rootPath + this.ttprops.fileSeparator + InstallationController.PACKAGE_LIB_DIR + this.ttprops.fileSeparator + this.ttprops.abbFileName, set) : new TreeTaggerTokenizer(null, set)).tokenize(jCas.getDocumentText().replaceAll("\n\n", "\nEMPTYLINE\n"))) {
            if (str.equals("EMPTYLINE") || jCas.getDocumentText().indexOf(str, i) >= 0) {
                Token token = new Token(jCas);
                if (str.equals("EMPTYLINE")) {
                    token.setBegin(i);
                    token.setEnd(i);
                    token.setPos("EMPTYLINE");
                    if (this.annotate_partofspeech.booleanValue()) {
                        token.addToIndexes();
                    }
                } else {
                    token.setBegin(jCas.getDocumentText().indexOf(str, i));
                    token.setEnd(token.getBegin() + str.length());
                    token.addToIndexes();
                    i = token.getEnd();
                }
            } else {
                Logger.printError(this.component, "Tokenization was interrupted because the token \"" + str + "\" could not be found in the original text. The reason for this might be that the encoding of the document is not UTF-8. This token was skipped and if it was part of a temporal expression, will not be extracted.");
            }
        }
    }

    private void tokenizeChinese(JCas jCas) {
        try {
            Process chineseTokenizationProcess = this.ttprops.getChineseTokenizationProcess();
            Logger.printDetail(this.component, "Chinese tokenization: " + this.ttprops.chineseTokenizerPath);
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(chineseTokenizationProcess.getInputStream(), "UTF-8"));
            BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(chineseTokenizationProcess.getOutputStream(), "UTF-8"));
            Integer num = 0;
            for (String str : jCas.getDocumentText().split("[\\r\\n]+")) {
                bufferedWriter.write(str);
                bufferedWriter.newLine();
                bufferedWriter.flush();
                for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                    for (String str2 : readLine.split(WalkEncryption.Vals.REGEX_WS)) {
                        if (jCas.getDocumentText().indexOf(str2, num.intValue()) < 0) {
                            throw new RuntimeException("Could not find token " + str2 + " in JCas after tokenizing with Chinese tokenization script.");
                        }
                        Token token = new Token(jCas);
                        token.setBegin(jCas.getDocumentText().indexOf(str2, num.intValue()));
                        token.setEnd(token.getBegin() + str2.length());
                        token.addToIndexes();
                        num = Integer.valueOf(token.getEnd());
                    }
                    if (!bufferedReader.ready()) {
                        break;
                    }
                }
            }
            bufferedReader.close();
            chineseTokenizationProcess.destroy();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void doTreeTag(JCas jCas) {
        try {
            if (this.ttProc == null) {
                this.ttProc = new TreeTaggerProcess(this.ttprops.getTreeTaggingProcess());
            }
            Logger.printDetail(this.component, "TreeTagger (pos tagging) with: " + this.ttprops.parFileName);
            AnnotationIndex annotationIndex = jCas.getAnnotationIndex(Token.type);
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            Iterator it = annotationIndex.iterator();
            while (it.hasNext()) {
                Token token = (Token) it.next();
                arrayList.add(token.getCoveredText());
                arrayList2.add(token);
            }
            this.ttreader = new TreeTaggerReader(arrayList2, this.ttProc.getStdout(), jCas, this.annotate_sentences);
            this.ttwriter = new TreeTaggerWriter(arrayList, this.ttProc.getStdin());
            Thread thread = new Thread(this.ttreader);
            Thread thread2 = new Thread(this.ttwriter);
            thread.start();
            thread2.start();
            thread.join();
            thread2.join();
        } catch (IOException | InterruptedException e) {
            e.printStackTrace();
        }
    }

    private void doTreeTagOld(JCas jCas) {
        File file = null;
        ArrayList arrayList = new ArrayList();
        try {
            file = File.createTempFile("postokens", null);
            BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
            Iterator it = jCas.getAnnotationIndex(Token.type).iterator();
            while (it.hasNext()) {
                Token token = (Token) it.next();
                arrayList.add(token);
                if (token.getBegin() != token.getEnd()) {
                    bufferedWriter.write(token.getCoveredText() + this.ttprops.newLineSeparator);
                }
            }
            bufferedWriter.close();
        } catch (IOException e) {
            Logger.printError("Something went wrong creating a temporary file for the treetagger to process.");
            System.exit(-1);
        }
        HashSet hashSet = new HashSet();
        hashSet.add("SENT");
        hashSet.add("$.");
        hashSet.add("FS");
        hashSet.add("_Z_Fst");
        hashSet.add("_Z_Int");
        hashSet.add("_Z_Exc");
        hashSet.add("ew");
        try {
            try {
                Process treeTaggingProcess = this.ttprops.getTreeTaggingProcess(file);
                Logger.printDetail(this.component, "TreeTagger (pos tagging) with: " + this.ttprops.parFileName);
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(treeTaggingProcess.getInputStream(), "UTF-8"));
                Sentence sentence = null;
                int i = 0;
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    int i2 = i;
                    i++;
                    Token token2 = (Token) arrayList.get(i2);
                    while (token2.getCoveredText().equals("")) {
                        if (this.annotate_sentences.booleanValue() && token2.getPos() != null && token2.getPos().equals("EMPTYLINE")) {
                            if (sentence == null) {
                                sentence = new Sentence(jCas);
                                sentence.setBegin(token2.getBegin());
                            }
                            sentence.setEnd(token2.getEnd());
                            if (sentence.getBegin() < sentence.getEnd()) {
                                sentence.addToIndexes();
                            }
                            sentence = null;
                        }
                        token2.removeFromIndexes();
                        int i3 = i;
                        i++;
                        token2 = (Token) arrayList.get(i3);
                    }
                    token2.removeFromIndexes();
                    if (!token2.getCoveredText().equals("")) {
                        token2.setPos(readLine);
                        token2.addToIndexes();
                    }
                    if (this.annotate_sentences.booleanValue()) {
                        if (sentence == null) {
                            sentence = new Sentence(jCas);
                            sentence.setBegin(token2.getBegin());
                        }
                        if (hashSet.contains(readLine) || i == arrayList.size()) {
                            sentence.setEnd(token2.getEnd());
                            sentence.addToIndexes();
                            sentence = null;
                        }
                    }
                }
                while (i < arrayList.size()) {
                    if (sentence != null) {
                        sentence.setEnd(((Token) arrayList.get(arrayList.size() - 1)).getEnd());
                        sentence.addToIndexes();
                    }
                    int i4 = i;
                    i++;
                    Token token3 = (Token) arrayList.get(i4);
                    if (token3.getPos() != null && token3.getPos().equals("EMPTYLINE")) {
                        token3.removeFromIndexes();
                    }
                }
                bufferedReader.close();
                treeTaggingProcess.destroy();
                file.delete();
            } catch (Exception e2) {
                e2.printStackTrace();
                file.delete();
            }
        } catch (Throwable th) {
            file.delete();
            throw th;
        }
    }

    public void setHome(String str) {
        this.ttprops.rootPath = str;
    }

    private void improveFrenchSentences(JCas jCas) {
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet hashSet3 = new HashSet();
        hashSet3.add("J.-C.");
        hashSet3.add("J-C.");
        hashSet3.add("NSJC");
        Boolean bool = true;
        while (bool.booleanValue()) {
            bool = false;
            FSIterator<T> it = jCas.getAnnotationIndex(Sentence.type).iterator();
            while (it.hasNext()) {
                Sentence sentence = (Sentence) it.next();
                if (sentence.getCoveredText().endsWith("av.") || sentence.getCoveredText().endsWith("Av.") || sentence.getCoveredText().endsWith("apr.") || sentence.getCoveredText().endsWith("Apr.") || sentence.getCoveredText().endsWith("avant.") || sentence.getCoveredText().endsWith("Avant.")) {
                    if (it.hasNext()) {
                        Sentence sentence2 = (Sentence) it.next();
                        it.moveToPrevious();
                        Iterator it2 = hashSet3.iterator();
                        while (true) {
                            if (it2.hasNext()) {
                                if (sentence2.getCoveredText().startsWith((String) it2.next())) {
                                    Sentence sentence3 = new Sentence(jCas);
                                    sentence3.setBegin(sentence.getBegin());
                                    sentence3.setEnd(sentence2.getEnd());
                                    hashSet2.add(sentence3);
                                    hashSet.add(sentence);
                                    hashSet.add(sentence2);
                                    bool = true;
                                    break;
                                }
                            }
                        }
                    }
                }
            }
            Iterator it3 = hashSet.iterator();
            while (it3.hasNext()) {
                ((Sentence) it3.next()).removeFromIndexes(jCas);
            }
            hashSet.clear();
            Iterator it4 = hashSet2.iterator();
            while (it4.hasNext()) {
                ((Sentence) it4.next()).addToIndexes(jCas);
            }
            hashSet2.clear();
        }
    }

    private void improveGermanSentences(JCas jCas) {
        HashSet hashSet = new HashSet();
        hashSet.add(new String[]{"CARD", "\\$.", "NN"});
        hashSet.add(new String[]{"CARD", "\\$.", "NE"});
        Iterator it = jCas.getAnnotationIndex(Sentence.type).iterator();
        HashSet hashSet2 = new HashSet();
        Sentence sentence = null;
        while (it.hasNext()) {
            if (sentence == null) {
                sentence = (Sentence) it.next();
            } else {
                Sentence sentence2 = sentence;
                sentence = (Sentence) it.next();
                Token token = null;
                Token token2 = null;
                Token token3 = null;
                FSIterator subiterator = jCas.getAnnotationIndex(Token.type).subiterator(sentence);
                if (subiterator.hasNext()) {
                    token3 = (Token) subiterator.next();
                }
                FSIterator subiterator2 = jCas.getAnnotationIndex(Token.type).subiterator(sentence2);
                while (subiterator2.hasNext()) {
                    if (token2 == null) {
                        token2 = (Token) subiterator2.next();
                    } else {
                        token = token2;
                        token2 = (Token) subiterator2.next();
                    }
                }
                if (token != null && token2 != null && token3 != null) {
                    Iterator it2 = hashSet.iterator();
                    while (it2.hasNext()) {
                        String[] strArr = (String[]) it2.next();
                        if ((token.getPos() != null && token.getPos().matches(strArr[0]) && token2.getPos() != null && token2.getPos().matches(strArr[1]) && token3.getPos() != null && token3.getPos().matches(strArr[2])) || token3.getCoveredText().matches("^[a-z/].*")) {
                            Boolean bool = false;
                            Iterator it3 = hashSet2.iterator();
                            while (it3.hasNext()) {
                                HashSet hashSet3 = (HashSet) it3.next();
                                if (hashSet3.contains(sentence) || hashSet3.contains(sentence2)) {
                                    hashSet3.add(sentence2);
                                    hashSet3.add(sentence);
                                    bool = true;
                                    break;
                                }
                            }
                            if (!bool.booleanValue()) {
                                HashSet hashSet4 = new HashSet();
                                hashSet4.add(sentence2);
                                hashSet4.add(sentence);
                                hashSet2.add(hashSet4);
                            }
                        }
                    }
                }
            }
        }
        Iterator it4 = hashSet2.iterator();
        while (it4.hasNext()) {
            HashSet hashSet5 = (HashSet) it4.next();
            Integer num = Integer.MAX_VALUE;
            Integer num2 = Integer.MIN_VALUE;
            Sentence sentence3 = new Sentence(jCas);
            Iterator it5 = hashSet5.iterator();
            while (it5.hasNext()) {
                Sentence sentence4 = (Sentence) it5.next();
                if (sentence4.getBegin() < num.intValue()) {
                    num = Integer.valueOf(sentence4.getBegin());
                }
                if (sentence4.getEnd() > num2.intValue()) {
                    num2 = Integer.valueOf(sentence4.getEnd());
                }
                sentence4.removeFromIndexes();
            }
            sentence3.setBegin(num.intValue());
            sentence3.setEnd(num2.intValue());
            sentence3.addToIndexes();
        }
    }

    public void quit() {
        this.ttProc.close();
        this.ttProc = null;
    }
}
