package hr.fer.zemris.takelab.uima.annotator.hunpos;

import de.unihd.dbs.uima.annotator.heideltime.resources.Language;
import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import hr.fer.zemris.takelab.splitter.TokenSplitter;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.impl.RootUimaContext_impl;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.impl.ConfigurationManager_impl;
import org.apache.uima.resource.impl.ResourceManager_impl;

/* loaded from: input_file:BOOT-INF/lib/heideltime-2.2.1.jar:hr/fer/zemris/takelab/uima/annotator/hunpos/HunPosTaggerWrapper.class */
public class HunPosTaggerWrapper extends JCasAnnotator_ImplBase {
    public static final String PARAM_LANGUAGE = "language";
    public static final String PARAM_PATH = "hunpos_path";
    public static final String PARAM_MODEL_PATH = "model_path";
    public static final String PARAM_ANNOTATE_TOKENS = "annotate_tokens";
    public static final String PARAM_ANNOTATE_SENTENCES = "annotate_sentences";
    public static final String PARAM_ANNOTATE_POS = "annotate_pos";
    private Language language;
    private boolean annotate_tokens;
    private boolean annotate_sentences;
    private boolean annotate_pos;

    /* loaded from: input_file:BOOT-INF/lib/heideltime-2.2.1.jar:hr/fer/zemris/takelab/uima/annotator/hunpos/HunPosTaggerWrapper$HunPosTaggerContext.class */
    private class HunPosTaggerContext extends RootUimaContext_impl {
        public HunPosTaggerContext(Language language, String str, String str2, Boolean bool, Boolean bool2, Boolean bool3) {
            ConfigurationManager_impl configurationManager_impl = new ConfigurationManager_impl();
            initializeRoot(null, new ResourceManager_impl(), configurationManager_impl);
            configurationManager_impl.setSession(getSession());
            configurationManager_impl.setConfigParameterValue(makeQualifiedName("language"), language.getName());
            configurationManager_impl.setConfigParameterValue(makeQualifiedName("model_path"), str2);
            configurationManager_impl.setConfigParameterValue(makeQualifiedName("hunpos_path"), str);
            configurationManager_impl.setConfigParameterValue(makeQualifiedName("annotate_tokens"), bool);
            configurationManager_impl.setConfigParameterValue(makeQualifiedName("annotate_pos"), bool3);
            configurationManager_impl.setConfigParameterValue(makeQualifiedName("annotate_sentences"), bool2);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:BOOT-INF/lib/heideltime-2.2.1.jar:hr/fer/zemris/takelab/uima/annotator/hunpos/HunPosTaggerWrapper$HunPosWrapper.class */
    public static class HunPosWrapper {
        private static List<String> command;
        public static final String HUNPOS_HOME = "HUNPOS_HOME";

        private HunPosWrapper() {
        }

        public static void initialize(String str) {
            initialize(str, null);
        }

        public static void initialize(String str, String str2) {
            String str3 = str2;
            if (str3 == null) {
                str3 = System.getenv(HUNPOS_HOME);
            }
            if (str3 == null || !new File(str3).exists()) {
                Logger.printError(HunPosWrapper.class, "The environment variable HUNPOS_HOME was not set, or set to \"" + str3 + "\", which does not exist.");
                System.exit(-1);
            }
            File file = new File(str3);
            command = new ArrayList();
            command.add(str3 + "/hunpos-tag");
            File file2 = new File(file, str);
            if (file2.exists()) {
                command.add(file2.getAbsolutePath());
            } else {
                Logger.printError(HunPosWrapper.class, "The supplied model path " + str + " does not exist.");
                System.exit(-1);
            }
        }

        public static void tagPOS(JCas jCas, boolean z) {
            Process process = null;
            String[] strArr = new String[command.size()];
            command.toArray(strArr);
            try {
                process = Runtime.getRuntime().exec(strArr);
            } catch (IOException e) {
                Logger.printError(HunPosWrapper.class, "An error occured while trying to call HunPos at " + System.getenv(HUNPOS_HOME));
                e.printStackTrace();
            }
            OutputStreamWriter outputStreamWriter = new OutputStreamWriter(process.getOutputStream());
            Logger.printDetail(HunPosWrapper.class, "Starting the POS tagging process.");
            ArrayList arrayList = new ArrayList();
            Iterator it = jCas.getAnnotationIndex(Token.type).iterator();
            while (it.hasNext()) {
                arrayList.add((Token) it.next());
            }
            Thread thread = new Thread(new Runnable(jCas, arrayList, z, process.getInputStream()) { // from class: hr.fer.zemris.takelab.uima.annotator.hunpos.HunPosTaggerWrapper.HunPosWrapper.1TaggingJob
                private JCas jCas;
                private List<Token> tokens;
                private boolean tagSentences;
                private InputStream input;
                private final Pattern HUNPOS_PATTERN = Pattern.compile("^(.+)\t([^\t]+)$");
                private final String terminal = "Z";
                private HunPosAnnotionTranslator trans = new HunPosAnnotionTranslator();

                {
                    this.jCas = jCas;
                    this.tokens = arrayList;
                    this.tagSentences = z;
                    this.input = r8;
                }

                @Override // java.lang.Runnable
                public void run() {
                    Scanner scanner = new Scanner(new InputStreamReader(new BufferedInputStream(this.input), Charset.forName("UTF-8")));
                    int i = 0;
                    Sentence sentence = null;
                    while (scanner.hasNextLine()) {
                        try {
                            String trim = scanner.nextLine().trim();
                            if (!trim.isEmpty()) {
                                int i2 = i;
                                i++;
                                Token token = this.tokens.get(i2);
                                while (token.getCoveredText().isEmpty()) {
                                    token.setPos("");
                                    token.addToIndexes();
                                    int i3 = i;
                                    i++;
                                    token = this.tokens.get(i3);
                                }
                                Matcher matcher = this.HUNPOS_PATTERN.matcher(trim);
                                if (matcher.find()) {
                                    trim = matcher.group(2);
                                } else {
                                    i--;
                                }
                                token.removeFromIndexes();
                                token.setPos(this.trans.translate(trim));
                                token.addToIndexes();
                                if (this.tagSentences) {
                                    if (sentence == null) {
                                        sentence = new Sentence(this.jCas);
                                        sentence.setBegin(token.getBegin());
                                    }
                                    if ("Z".equals(trim) || i == this.tokens.size()) {
                                        sentence.setEnd(token.getEnd());
                                        sentence.addToIndexes();
                                        sentence = null;
                                    }
                                }
                            }
                        } catch (Exception e2) {
                            e2.printStackTrace();
                            return;
                        }
                    }
                    scanner.close();
                }
            });
            thread.start();
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                try {
                    outputStreamWriter.write(((Token) it2.next()).getCoveredText() + "\n");
                } catch (IOException e2) {
                    e2.printStackTrace();
                }
            }
            try {
                outputStreamWriter.close();
            } catch (IOException e3) {
                e3.printStackTrace();
            }
            try {
                thread.join();
                process.waitFor();
            } catch (InterruptedException e4) {
                e4.printStackTrace();
            }
        }
    }

    /* loaded from: input_file:BOOT-INF/lib/heideltime-2.2.1.jar:hr/fer/zemris/takelab/uima/annotator/hunpos/HunPosTaggerWrapper$TokenSplitterWrapper.class */
    private static class TokenSplitterWrapper {
        private TokenSplitterWrapper() {
        }

        public static void splitTokens(JCas jCas) {
            int i = 0;
            for (String str : TokenSplitter.getTokens(jCas.getDocumentText())) {
                if (jCas.getDocumentText().indexOf(str, i) < 0) {
                    throw new RuntimeException("Opps! Could not find token " + str + " in JCas after tokenizing with token splitter for Croatian. Hmm, there may exist a charset missmatch! Default encoding is " + Charset.defaultCharset().name() + " and should always be UTF-8.");
                }
                Token token = new Token(jCas);
                token.setBegin(jCas.getDocumentText().indexOf(str, i));
                token.setEnd(token.getBegin() + str.length());
                token.addToIndexes();
                i = token.getEnd();
            }
        }
    }

    public void initialize(Language language, String str, String str2, Boolean bool, Boolean bool2, Boolean bool3) {
        initialize(new HunPosTaggerContext(language, str, str2, bool, bool2, bool3));
    }

    @Override // org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) {
        this.annotate_tokens = ((Boolean) uimaContext.getConfigParameterValue("annotate_tokens")).booleanValue();
        this.annotate_sentences = ((Boolean) uimaContext.getConfigParameterValue("annotate_sentences")).booleanValue();
        this.annotate_pos = ((Boolean) uimaContext.getConfigParameterValue("annotate_pos")).booleanValue();
        this.language = Language.getLanguageFromString((String) uimaContext.getConfigParameterValue("language"));
        HunPosWrapper.initialize((String) uimaContext.getConfigParameterValue("model_path"), (String) uimaContext.getConfigParameterValue("hunpos_path"));
    }

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        if (this.annotate_tokens) {
            TokenSplitterWrapper.splitTokens(jCas);
        }
        if (this.annotate_pos) {
            HunPosWrapper.tagPOS(jCas, this.annotate_sentences);
        }
        if (this.language == Language.CROATIAN) {
            fixCroatianSentences(jCas);
        }
    }

    private void fixCroatianSentences(JCas jCas) {
        FSIterator<T> it = jCas.getAnnotationIndex(Sentence.type).iterator();
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        boolean z = false;
        boolean z2 = false;
        Sentence sentence = null;
        while (it.hasNext()) {
            Sentence sentence2 = (Sentence) it.next();
            String coveredText = sentence2.getCoveredText();
            if (z2 || (z && (!coveredText.matches("^[A-ZŠĐČĆŽ].*") || (coveredText.matches("^[A-ZŠĐČĆŽ].*") && coveredText.matches("^(Siječ(anj|nja)|Veljač[ae]|Ožuj(ak|ka)|Trav(anj|nja)|Svib(anj|nja)|Lip(anj|nja)|Srp(anj|nja)|Kolovoza?|Ruj(an|na)|Listopada?|Studen(i|og)|Prosin(ac|ca)).*"))))) {
                Sentence sentence3 = new Sentence(jCas);
                sentence3.setBegin(sentence.getBegin());
                sentence3.setEnd(sentence2.getEnd());
                if (hashSet.contains(sentence)) {
                    hashSet.remove(sentence);
                }
                hashSet.add(sentence3);
                z = false;
                z2 = false;
                sentence = sentence3;
                coveredText = sentence.getCoveredText();
            } else {
                if (!hashSet.contains(sentence2)) {
                    hashSet.add(sentence2);
                }
                sentence = sentence2;
            }
            if (coveredText.matches("(?s).*\\d{1,4}\\.$")) {
                z = true;
            }
            if (coveredText.matches("(?s)^.*(\\s[A-Z]\\.|[:;,%\"\\(\\)\\-])$")) {
                z2 = true;
            }
        }
        it.moveToFirst();
        while (it.hasNext()) {
            hashSet2.add((Sentence) it.next());
        }
        Iterator it2 = hashSet2.iterator();
        while (it2.hasNext()) {
            ((Sentence) it2.next()).removeFromIndexes(jCas);
        }
        Iterator it3 = hashSet.iterator();
        while (it3.hasNext()) {
            ((Sentence) it3.next()).addToIndexes(jCas);
        }
    }
}
