package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;

/* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.5.6.jar:edu/stanford/nlp/pipeline/MWTAnnotator.class */
public class MWTAnnotator implements Annotator {
    private boolean useDictionary;
    private boolean preserveCasing;
    private Annotator statisticalMWTAnnotator;
    private boolean useStatisticalModel;
    private HashMap<String, List<String>> multiWordTokenMapping = new HashMap<>();
    private HashMap<String, List<String>> statisticalMultiWordTokenMapping = new HashMap<>();

    public MWTAnnotator(String str, Properties properties) {
        this.useDictionary = false;
        this.useStatisticalModel = false;
        String str2 = (str == null || str.equals("")) ? "mwt." : str + ".mwt.";
        if (!properties.getProperty(str2 + "mappingFile", "").equals("")) {
            loadMultiWordTokenMappings(this.multiWordTokenMapping, properties.getProperty(str2 + "mappingFile"));
            this.useDictionary = true;
        }
        if (!properties.getProperty(str2 + "pos.model", "").equals("")) {
            this.useStatisticalModel = true;
            this.statisticalMWTAnnotator = new POSTaggerAnnotator("mwt.pos", properties);
            loadMultiWordTokenMappings(this.statisticalMultiWordTokenMapping, properties.getProperty(str2 + "statisticalMappingFile"));
        }
        this.preserveCasing = PropertiesUtils.getBool(properties, str2 + "preserveCasing", true);
    }

    public void loadMultiWordTokenMappings(HashMap<String, List<String>> hashMap, String str) {
        for (String str2 : IOUtils.linesFromFile(str)) {
            hashMap.put(str2.split("\t")[0].toLowerCase(), (List) Arrays.asList(str2.split("\t")[1].split(",")).stream().map(str3 -> {
                return str3.toLowerCase();
            }).collect(Collectors.toList()));
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v105, types: [java.util.List] */
    /* JADX WARN: Type inference failed for: r0v127, types: [java.util.List] */
    /* JADX WARN: Type inference failed for: r0v140, types: [java.util.List] */
    @Override // edu.stanford.nlp.pipeline.Annotator
    public void annotate(Annotation annotation) {
        ArrayList arrayList = new ArrayList();
        if (this.useStatisticalModel) {
            this.statisticalMWTAnnotator.annotate(annotation);
        }
        int i = 0;
        for (CoreMap coreMap : (List) annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            coreMap.set(CoreAnnotations.TokenBeginAnnotation.class, Integer.valueOf(arrayList.size()));
            ArrayList arrayList2 = new ArrayList();
            int i2 = 1;
            for (CoreLabel coreLabel : (List) coreMap.get(CoreAnnotations.TokensAnnotation.class)) {
                ArrayList<String> arrayList3 = new ArrayList();
                if (this.useStatisticalModel) {
                    String lowerCase = String.format("%s-%s", coreLabel.word().toLowerCase(), coreLabel.tag()).toLowerCase();
                    if (this.statisticalMultiWordTokenMapping.containsKey(lowerCase)) {
                        arrayList3 = (List) this.statisticalMultiWordTokenMapping.get(lowerCase).stream().collect(Collectors.toList());
                    }
                }
                if (this.useDictionary && this.multiWordTokenMapping.containsKey(coreLabel.word().toLowerCase())) {
                    arrayList3 = (List) this.multiWordTokenMapping.get(coreLabel.word().toLowerCase()).stream().collect(Collectors.toList());
                }
                if (arrayList3.size() > 1) {
                    if (this.preserveCasing) {
                        if (StringUtils.isAllUpperCase(coreLabel.word())) {
                            arrayList3 = (List) arrayList3.stream().map(str -> {
                                return str.toUpperCase();
                            }).collect(Collectors.toList());
                        } else if (StringUtils.isTitleCase(coreLabel.word())) {
                            arrayList3.set(0, StringUtils.toTitleCase((String) arrayList3.get(0)));
                        }
                    }
                    boolean z = true;
                    for (String str2 : arrayList3) {
                        CoreLabel coreLabel2 = new CoreLabel();
                        coreLabel2.setWord(str2);
                        coreLabel2.setValue(str2);
                        coreLabel2.setOriginalText(str2);
                        coreLabel2.setIsNewline(false);
                        if (coreLabel.keySet().contains(CoreAnnotations.ParentAnnotation.class)) {
                            coreLabel2.set(CoreAnnotations.ParentAnnotation.class, coreLabel.get(CoreAnnotations.ParentAnnotation.class));
                        }
                        coreLabel2.set(CoreAnnotations.TokenBeginAnnotation.class, Integer.valueOf(arrayList.size()));
                        coreLabel2.set(CoreAnnotations.TokenEndAnnotation.class, Integer.valueOf(arrayList.size() + 1));
                        coreLabel2.setBeginPosition(coreLabel.beginPosition());
                        coreLabel2.setEndPosition(coreLabel.endPosition());
                        coreLabel2.setBefore(coreLabel.before());
                        coreLabel2.setAfter(coreLabel.after());
                        coreLabel2.set(CoreAnnotations.MWTTokenTextAnnotation.class, coreLabel.word());
                        coreLabel2.setIsMWT(true);
                        if (z) {
                            coreLabel2.setIsMWTFirst(true);
                            z = false;
                        } else {
                            coreLabel2.setIsMWTFirst(false);
                        }
                        coreLabel2.setIndex(i2);
                        coreLabel2.setSentIndex(i);
                        arrayList2.add(coreLabel2);
                        arrayList.add(coreLabel2);
                        i2++;
                    }
                } else {
                    CoreLabel coreLabel3 = new CoreLabel(coreLabel);
                    coreLabel3.set(CoreAnnotations.TokenBeginAnnotation.class, Integer.valueOf(arrayList.size()));
                    coreLabel3.set(CoreAnnotations.TokenEndAnnotation.class, Integer.valueOf(arrayList.size() + 1));
                    coreLabel3.setIndex(i2);
                    coreLabel3.setIsMWT(false);
                    coreLabel3.setIsMWTFirst(false);
                    arrayList2.add(coreLabel3);
                    arrayList.add(coreLabel3);
                    i2++;
                }
            }
            coreMap.set(CoreAnnotations.TokenEndAnnotation.class, Integer.valueOf(arrayList.size()));
            coreMap.set(CoreAnnotations.TokensAnnotation.class, arrayList2);
            i++;
        }
        annotation.set(CoreAnnotations.TokensAnnotation.class, arrayList);
        if (this.useStatisticalModel) {
            Iterator it = ((List) annotation.get(CoreAnnotations.TokensAnnotation.class)).iterator();
            while (it.hasNext()) {
                ((CoreLabel) it.next()).remove(CoreAnnotations.PartOfSpeechAnnotation.class);
            }
        }
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Class<? extends CoreAnnotation>> requires() {
        return Collections.unmodifiableSet(new ArraySet(Arrays.asList(CoreAnnotations.TokensAnnotation.class, CoreAnnotations.CharacterOffsetBeginAnnotation.class, CoreAnnotations.CharacterOffsetEndAnnotation.class, CoreAnnotations.SentencesAnnotation.class)));
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
        return Collections.unmodifiableSet(new ArraySet(Arrays.asList(CoreAnnotations.MWTTokenTextAnnotation.class, CoreAnnotations.IsMultiWordTokenAnnotation.class)));
    }
}
