package de.unihd.dbs.uima.annotator.stanfordtagger;

import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import java.io.FileInputStream;
import java.io.StringReader;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Properties;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;

/* loaded from: input_file:BOOT-INF/lib/heideltime-2.2.1.jar:de/unihd/dbs/uima/annotator/stanfordtagger/StanfordPOSTaggerWrapper.class */
public class StanfordPOSTaggerWrapper extends JCasAnnotator_ImplBase {
    public static final String PARAM_MODEL_PATH = "model_path";
    public static final String PARAM_CONFIG_PATH = "config_path";
    public static final String PARAM_ANNOTATE_TOKENS = "annotate_tokens";
    public static final String PARAM_ANNOTATE_SENTENCES = "annotate_sentences";
    public static final String PARAM_ANNOTATE_PARTOFSPEECH = "annotate_partofspeech";
    private String model_path;
    private String config_path;
    private MaxentTagger mt;
    private Class<?> component = getClass();
    private Boolean annotate_tokens = false;
    private Boolean annotate_sentences = false;
    private Boolean annotate_partofspeech = false;

    @Override // org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) {
        this.annotate_tokens = (Boolean) uimaContext.getConfigParameterValue("annotate_tokens");
        this.annotate_sentences = (Boolean) uimaContext.getConfigParameterValue("annotate_sentences");
        this.annotate_partofspeech = (Boolean) uimaContext.getConfigParameterValue("annotate_partofspeech");
        this.model_path = (String) uimaContext.getConfigParameterValue("model_path");
        this.config_path = (String) uimaContext.getConfigParameterValue("config_path");
        if (this.model_path == null) {
            Logger.printError(this.component, "The model file for the Stanford Tagger was not correctly specified.");
            System.exit(-1);
        }
        try {
            if (this.config_path != null) {
                FileInputStream fileInputStream = new FileInputStream(this.config_path);
                Properties properties = new Properties();
                properties.load(fileInputStream);
                this.mt = new MaxentTagger(this.model_path, (Properties) new TaggerConfig(properties), false);
            } else {
                this.mt = new MaxentTagger(this.model_path, (Properties) new TaggerConfig("-model", this.model_path), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
            Logger.printError(this.component, "MaxentTagger could not be instantiated with the supplied model(" + this.model_path + ") and config(" + this.config_path + ") file.");
            System.exit(-1);
        }
    }

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        Integer num = 0;
        String documentText = jCas.getDocumentText();
        TokenizerFactory<Word> newTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newTokenizerFactory();
        newTokenizerFactory.setOptions("ptb3Escaping=false,untokenizable=noneKeep");
        for (List<HasWord> list : MaxentTagger.tokenizeText(new StringReader(documentText), newTokenizerFactory)) {
            ListIterator<TaggedWord> listIterator = this.mt.tagSentence((List<? extends HasWord>) list).listIterator();
            Sentence sentence = new Sentence(jCas);
            sentence.setBegin(num.intValue());
            Integer num2 = 0;
            for (HasWord hasWord : list) {
                Token token = new Token(jCas);
                TaggedWord next = listIterator.next();
                if (this.annotate_partofspeech.booleanValue()) {
                    token.setPos(next.tag());
                }
                String word = hasWord.word();
                if (documentText.indexOf(word, num.intValue()) < 0) {
                    Logger.printDetail(this.component, "A previously tagged token wasn't found in the document text: \"" + word + "\". This may be due to unpredictable punctuation tokenization; hence this token isn't tagged.");
                } else {
                    Integer valueOf = Integer.valueOf(documentText.indexOf(word, num.intValue()));
                    token.setBegin(valueOf.intValue());
                    num2 = Integer.valueOf(num2.intValue() + 1);
                    num = Integer.valueOf(valueOf.intValue() + word.length());
                    token.setEnd(num.intValue());
                    if (this.annotate_tokens.booleanValue()) {
                        token.addToIndexes();
                    }
                }
            }
            if (this.annotate_sentences.booleanValue()) {
                if (num2.intValue() == 0) {
                    sentence.setEnd(num.intValue());
                } else {
                    sentence.setEnd(num.intValue() - 1);
                }
                sentence.addToIndexes();
            }
        }
        Iterator it = jCas.getAnnotationIndex(Sentence.type).iterator();
        while (it.hasNext()) {
            Sentence sentence2 = (Sentence) it.next();
            if (sentence2.getBegin() < 0 || sentence2.getEnd() < 0) {
                System.err.println("Sentence: " + sentence2.getBegin() + ":" + sentence2.getEnd() + " = " + sentence2.getCoveredText());
                System.err.println("wrong index in text: " + jCas.getDocumentText());
                System.exit(-1);
            }
        }
        Iterator it2 = jCas.getAnnotationIndex(Token.type).iterator();
        while (it2.hasNext()) {
            Token token2 = (Token) it2.next();
            if (token2.getBegin() < 0 || token2.getEnd() < 0) {
                System.err.println("In text: " + jCas.getDocumentText());
                System.err.println("Token: " + token2.getBegin() + ":" + token2.getEnd());
                System.exit(-1);
            }
        }
    }
}
