package de.unibamberg.minf.gtf.extensions.nlp.stanford;

import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.List;
import java.util.ListIterator;
import java.util.Properties;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.InitializingBean;

/* loaded from: input_file:de/unibamberg/minf/gtf/extensions/nlp/stanford/MaxentTaggerWrapper.class */
public class MaxentTaggerWrapper extends JCasAnnotator_ImplBase implements InitializingBean {
    private static Logger logger = LoggerFactory.getLogger(MaxentTaggerWrapper.class);
    private String language;
    private String modelsPath;
    private String taggerModelPath;
    private String classifierModelPath;
    private String lexParseModelPath;
    private MaxentTagger tagger;
    private boolean autoInit = true;
    private boolean initialized;

    public String getModelsPath() {
        return this.modelsPath;
    }

    public void setModelsPath(String str) {
        this.modelsPath = str;
    }

    public String getLanguage() {
        return this.language;
    }

    public void setLanguage(String str) {
        this.language = str;
    }

    public String getTaggerModelPath() {
        return this.taggerModelPath;
    }

    public void setTaggerModelPath(String str) {
        this.taggerModelPath = str;
    }

    public String getClassifierModelPath() {
        return this.classifierModelPath;
    }

    public void setClassifierModelPath(String str) {
        this.classifierModelPath = str;
    }

    public String getLexParseModelPath() {
        return this.lexParseModelPath;
    }

    public void setLexParseModelPath(String str) {
        this.lexParseModelPath = str;
    }

    public boolean isAutoInit() {
        return this.autoInit;
    }

    public void setAutoInit(boolean z) {
        this.autoInit = z;
    }

    public void afterPropertiesSet() {
        if (this.autoInit) {
            init();
            this.initialized = true;
        }
    }

    public void init() {
        if (this.initialized) {
            return;
        }
        if (getModelsPath() == null || !new File(getModelsPath()).exists()) {
            logger.error("Cannot not initialize MaxentTaggerWrapper. Model not set or does not exist.");
            return;
        }
        try {
            URL resource = URLClassLoader.newInstance(new URL[]{new File(getModelsPath()).toURI().toURL()}, getClass().getClassLoader()).getResource(getTaggerModelPath());
            Properties properties = new Properties();
            properties.put("tokenize.language", "de");
            properties.put("pos.model", getTaggerModelPath());
            properties.put("ner.model", getClassifierModelPath());
            properties.put("lex.model", getLexParseModelPath());
            properties.put("ner.applyNumericClassifiers", "false");
            properties.put("ner.useSUTime", "false");
            try {
                this.tagger = new MaxentTagger(resource.toString(), properties, true);
                this.initialized = true;
            } catch (Exception e) {
                logger.error("Failed to initialize MaxentTaggerWrapper.", e);
            }
        } catch (IOException e2) {
            logger.error("Failed to initialize POS tagger wrapper", e2);
        }
    }

    public List<TaggedWord> processSentence(String str) {
        if (!this.initialized) {
            init();
        }
        TokenizerFactory newTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newTokenizerFactory();
        newTokenizerFactory.setOptions("ptb3Escaping=false,untokenizable=noneKeep");
        return this.tagger.tagSentence(newTokenizerFactory.getTokenizer(new StringReader(str)).tokenize());
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        if (!this.initialized) {
            init();
        }
        Integer num = 0;
        String documentText = jCas.getDocumentText();
        TokenizerFactory newTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newTokenizerFactory();
        newTokenizerFactory.setOptions("ptb3Escaping=false,untokenizable=noneKeep");
        List<HasWord> list = newTokenizerFactory.getTokenizer(new StringReader(documentText)).tokenize();
        ListIterator listIterator = this.tagger.tagSentence(list).listIterator();
        Sentence sentence = new Sentence(jCas);
        sentence.setBegin(num.intValue());
        Integer num2 = 0;
        for (HasWord hasWord : list) {
            Token token = new Token(jCas);
            token.setPos(((TaggedWord) listIterator.next()).tag());
            String word = hasWord.word();
            if (documentText.indexOf(word, num.intValue()) < 0) {
                logger.info("A previously tagged token wasn't found in the document text: \"" + word + "\". This may be due to unpredictable punctuation tokenization; hence this token isn't tagged.");
            } else {
                Integer valueOf = Integer.valueOf(documentText.indexOf(word, num.intValue()));
                token.setBegin(valueOf.intValue());
                num2 = Integer.valueOf(num2.intValue() + 1);
                num = Integer.valueOf(valueOf.intValue() + word.length());
                token.setEnd(num.intValue());
                token.addToIndexes();
            }
        }
        if (num2.intValue() == 0) {
            sentence.setEnd(num.intValue());
        } else {
            sentence.setEnd(num.intValue() - 1);
        }
        sentence.addToIndexes();
    }
}
