package de.unihd.dbs.uima.annotator.jvntextprowrapper;

import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger;
import de.unihd.dbs.uima.types.heideltime.Token;
import edu.stanford.nlp.time.SUTime;
import java.io.File;
import java.util.LinkedList;
import java.util.List;
import jmaxent.Classification;
import jvnpostag.POSContextGenerator;
import jvnpostag.POSDataReader;
import jvnsegmenter.CRFSegmenter;
import jvnsensegmenter.JVnSenSegmenter;
import jvntextpro.JVnTextPro;
import jvntextpro.conversion.CompositeUnicode2Unicode;
import jvntextpro.data.DataReader;
import jvntextpro.data.Sentence;
import jvntextpro.data.TWord;
import jvntextpro.data.TaggingData;
import jvntextpro.util.StringUtils;
import jvntokenizer.PennTokenizer;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.eclipse.jgit.transport.WalkEncryption;

/* loaded from: input_file:BOOT-INF/lib/heideltime-2.2.1.jar:de/unihd/dbs/uima/annotator/jvntextprowrapper/JVnTextProWrapper.class */
public class JVnTextProWrapper extends JCasAnnotator_ImplBase {
    public static final String PARAM_SENTSEGMODEL_PATH = "sent_model_path";
    public static final String PARAM_WORDSEGMODEL_PATH = "word_model_path";
    public static final String PARAM_POSMODEL_PATH = "pos_model_path";
    public static final String PARAM_ANNOTATE_TOKENS = "annotate_tokens";
    public static final String PARAM_ANNOTATE_SENTENCES = "annotate_sentences";
    public static final String PARAM_ANNOTATE_PARTOFSPEECH = "annotate_partofspeech";
    private Class<?> component = getClass();
    private Boolean annotate_tokens = false;
    private Boolean annotate_sentences = false;
    private Boolean annotate_partofspeech = false;
    private String sentModelPath = null;
    private String wordModelPath = null;
    private String posModelPath = null;
    JVnSenSegmenter vnSenSegmenter = new JVnSenSegmenter();
    CRFSegmenter vnSegmenter = new CRFSegmenter();
    DataReader reader = new POSDataReader();
    TaggingData dataTagger = new TaggingData();
    Classification classifier = null;

    @Override // org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) {
        this.annotate_tokens = (Boolean) uimaContext.getConfigParameterValue("annotate_tokens");
        this.annotate_sentences = (Boolean) uimaContext.getConfigParameterValue("annotate_sentences");
        this.annotate_partofspeech = (Boolean) uimaContext.getConfigParameterValue("annotate_partofspeech");
        this.sentModelPath = (String) uimaContext.getConfigParameterValue("sent_model_path");
        this.wordModelPath = (String) uimaContext.getConfigParameterValue("word_model_path");
        this.posModelPath = (String) uimaContext.getConfigParameterValue("pos_model_path");
        if (this.sentModelPath != null && !this.vnSenSegmenter.init(this.sentModelPath)) {
            Logger.printError(this.component, "Error initializing the sentence segmenter model: " + this.sentModelPath);
            System.exit(-1);
        }
        if (this.wordModelPath != null) {
            try {
                this.vnSegmenter.init(this.wordModelPath);
            } catch (Exception e) {
                Logger.printError(this.component, "Error initializing the word segmenter model: " + this.wordModelPath);
                System.exit(-1);
            }
        }
        if (this.posModelPath != null) {
            try {
                this.dataTagger.addContextGenerator(new POSContextGenerator(this.posModelPath + File.separator + "featuretemplate.xml"));
                this.classifier = new Classification(this.posModelPath);
            } catch (Exception e2) {
                Logger.printError(this.component, "Error initializing the POS tagging model: " + this.posModelPath);
                System.exit(-1);
            }
        }
    }

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        CompositeUnicode2Unicode compositeUnicode2Unicode = new CompositeUnicode2Unicode();
        String documentText = jCas.getDocumentText();
        String trim = this.vnSenSegmenter.senSegment(compositeUnicode2Unicode.convert(documentText)).trim();
        String trim2 = new JVnTextPro().postProcessing(this.vnSegmenter.segmenting(PennTokenizer.tokenize(trim).trim())).trim();
        List<Sentence> jvnTagging = jvnTagging(trim2);
        LinkedList linkedList = new LinkedList();
        for (Sentence sentence : jvnTagging) {
            int i = 0;
            while (true) {
                Integer num = i;
                if (num.intValue() < sentence.size()) {
                    linkedList.add(sentence.getTWordAt(num.intValue()));
                    i = Integer.valueOf(num.intValue() + 1);
                }
            }
        }
        if (this.annotate_sentences.booleanValue()) {
            Integer num2 = 0;
            for (String str : trim.split("\n")) {
                de.unihd.dbs.uima.types.heideltime.Sentence sentence2 = new de.unihd.dbs.uima.types.heideltime.Sentence(jCas);
                String trim3 = str.trim();
                Integer valueOf = Integer.valueOf(documentText.indexOf(trim3, num2.intValue()));
                if (valueOf.intValue() >= 0) {
                    sentence2.setBegin(valueOf.intValue());
                    num2 = Integer.valueOf(valueOf.intValue() + trim3.length());
                    sentence2.setEnd(num2.intValue());
                    sentence2.addToIndexes();
                } else {
                    String trim4 = trim3.substring(0, trim3.length() - 1).trim();
                    Integer valueOf2 = Integer.valueOf(documentText.indexOf(trim4, num2.intValue()));
                    if (valueOf2.intValue() >= 0) {
                        sentence2.setBegin(valueOf2.intValue());
                        num2 = Integer.valueOf(valueOf2.intValue() + trim4.length());
                        sentence2.setEnd(num2.intValue());
                        sentence2.addToIndexes();
                    } else {
                        System.err.println("Sentence \"" + trim4 + "\" was not found in the original text.");
                    }
                }
            }
        }
        if (!this.annotate_tokens.booleanValue()) {
            return;
        }
        Integer num3 = 0;
        String[] split = trim2.split(WalkEncryption.Vals.REGEX_WS);
        int i2 = 0;
        while (true) {
            Integer num4 = i2;
            if (num4.intValue() >= split.length) {
                return;
            }
            String trim5 = split[num4.intValue()].trim();
            String str2 = null;
            if (linkedList.size() >= num4.intValue() + 1) {
                if (trim5.equals(((TWord) linkedList.get(num4.intValue())).getWord())) {
                    str2 = ((TWord) linkedList.get(num4.intValue())).getTag();
                } else {
                    System.err.println("Couldn't match token: " + trim5 + " to expected word/tag combination " + ((TWord) linkedList.get(num4.intValue())).getWord());
                }
            }
            Integer valueOf3 = Integer.valueOf(documentText.indexOf(trim5, num3.intValue()));
            Token token = new Token(jCas);
            if (valueOf3.intValue() >= 0) {
                token.setBegin(valueOf3.intValue());
                num3 = Integer.valueOf(valueOf3.intValue() + trim5.length());
                token.setEnd(num3.intValue());
                sanitizeToken(token, jCas);
                if (this.annotate_tokens.booleanValue()) {
                    token.setPos(str2);
                }
                token.addToIndexes();
            } else {
                String replaceAll = trim5.replaceAll("_", " ");
                Integer valueOf4 = Integer.valueOf(documentText.indexOf(replaceAll, num3.intValue()));
                String replaceAll2 = trim5.replaceAll("_", "");
                Integer valueOf5 = Integer.valueOf(documentText.indexOf(replaceAll2, num3.intValue()));
                if (valueOf5.intValue() < 0 || valueOf4.intValue() < 0) {
                    if (valueOf5.intValue() >= 0 && valueOf4.intValue() == -1) {
                        token.setBegin(valueOf5.intValue());
                        num3 = Integer.valueOf(valueOf5.intValue() + replaceAll2.length());
                        token.setEnd(num3.intValue());
                        sanitizeToken(token, jCas);
                        if (this.annotate_tokens.booleanValue()) {
                            token.setPos(str2);
                        }
                        token.addToIndexes();
                    } else if (valueOf5.intValue() != -1 || valueOf4.intValue() < 0) {
                        System.err.println("Token \"" + trim5 + "\" was not found in the original text.");
                    } else {
                        token.setBegin(valueOf4.intValue());
                        num3 = Integer.valueOf(valueOf4.intValue() + replaceAll.length());
                        token.setEnd(num3.intValue());
                        sanitizeToken(token, jCas);
                        if (this.annotate_tokens.booleanValue()) {
                            token.setPos(str2);
                        }
                        token.addToIndexes();
                    }
                } else if (valueOf5.intValue() >= valueOf4.intValue()) {
                    token.setBegin(valueOf4.intValue());
                    num3 = Integer.valueOf(valueOf4.intValue() + replaceAll.length());
                    token.setEnd(num3.intValue());
                    sanitizeToken(token, jCas);
                    if (this.annotate_tokens.booleanValue()) {
                        token.setPos(str2);
                    }
                    token.addToIndexes();
                } else {
                    token.setBegin(valueOf5.intValue());
                    num3 = Integer.valueOf(valueOf5.intValue() + replaceAll2.length());
                    token.setEnd(num3.intValue());
                    sanitizeToken(token, jCas);
                    token.addToIndexes();
                }
            }
            i2 = Integer.valueOf(num4.intValue() + 1);
        }
    }

    private Boolean sanitizeToken(Token token, JCas jCas) {
        Boolean bool = false;
        if (token.getCoveredText().matches("^\\p{Punct}.*") && token.getCoveredText().length() > 1) {
            Character valueOf = Character.valueOf(token.getCoveredText().charAt(0));
            token.setBegin(token.getBegin() + 1);
            Token token2 = new Token(jCas);
            token2.setBegin(token.getBegin() - 1);
            token2.setEnd(token.getBegin());
            if (this.annotate_partofspeech.booleanValue()) {
                token2.setPos("" + valueOf);
            }
            if (this.annotate_tokens.booleanValue()) {
                token2.addToIndexes();
            }
            bool = true;
        }
        if (token.getCoveredText().matches(".*\\p{Punct}$") && token.getCoveredText().length() > 1) {
            Character valueOf2 = Character.valueOf(token.getCoveredText().charAt((token.getEnd() - token.getBegin()) - 1));
            token.setEnd(token.getEnd() - 1);
            Token token3 = new Token(jCas);
            token3.setBegin(token.getEnd());
            token3.setEnd(token.getEnd() + 1);
            if (this.annotate_partofspeech.booleanValue()) {
                token3.setPos("" + valueOf2);
            }
            if (this.annotate_tokens.booleanValue()) {
                token3.addToIndexes();
            }
            bool = true;
        }
        if (bool.booleanValue()) {
            bool = sanitizeToken(token, jCas);
        }
        return bool;
    }

    public List<Sentence> jvnTagging(String str) {
        List<Sentence> readString = this.reader.readString(str);
        for (int i = 0; i < readString.size(); i++) {
            Sentence sentence = readString.get(i);
            for (int i2 = 0; i2 < sentence.size(); i2++) {
                String classify = this.classifier.classify(this.dataTagger.getContext(sentence, i2));
                if (classify.equalsIgnoreCase("Mrk")) {
                    classify = StringUtils.isPunc(sentence.getWordAt(i2)) ? sentence.getWordAt(i2) : SUTime.PAD_FIELD_UNKNOWN;
                }
                sentence.getTWordAt(i2).setTag(classify);
            }
        }
        return readString;
    }
}
