package edu.stanford.nlp.ie.regexp;

import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.time.TimeExpressionExtractor;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PaddedList;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.regex.Pattern;

/* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.5.6.jar:edu/stanford/nlp/ie/regexp/ChineseNumberSequenceClassifier.class */
public class ChineseNumberSequenceClassifier extends AbstractSequenceClassifier<CoreLabel> {
    private static final boolean DEBUG = false;
    private final boolean useSUTime;
    public static final boolean USE_SUTIME_DEFAULT = false;
    public static final String USE_SUTIME_PROPERTY = "ner.useSUTime";
    public static final String USE_SUTIME_PROPERTY_BASE = "useSUTime";
    public static final String SUTIME_PROPERTY = "sutime";
    private final TimeExpressionExtractor timexExtractor;
    public static final String NUMBER_TAG = "NUMBER";
    public static final String DATE_TAG = "DATE";
    public static final String TIME_TAG = "TIME";
    public static final String MONEY_TAG = "MONEY";
    public static final String ORDINAL_TAG = "ORDINAL";
    public static final String PERCENT_TAG = "PERCENT";
    private static final String DATE_AGE_LOCALIZER = "后";
    private static Redwood.RedwoodChannels log = Redwood.channels(ChineseNumberSequenceClassifier.class);
    public static final Pattern CURRENCY_WORD_PATTERN = Pattern.compile("元|刀|(?:美|欧|澳|加|日|韩)元|英?镑|法郎|卢比|卢布|马克|先令|克朗|泰?铢|(?:越南)?盾|美分|便士|块钱|毛钱|角钱");
    public static final Pattern PERCENT_WORD_PATTERN1 = Pattern.compile("(?:百分之|千分之).+");
    public static final Pattern PERCENT_WORD_PATTERN2 = Pattern.compile(".+%");
    public static final Pattern DATE_PATTERN1 = Pattern.compile(".+(?:年代?|月份?|日|号|世纪)");
    public static final Pattern DATE_PATTERN2 = Pattern.compile("(?:星期|周|礼拜).+");
    public static final Pattern DATE_PATTERN3 = Pattern.compile("[0-9一二三四五六七八九零〇十]{2,4}");
    public static final Pattern DATE_PATTERN4 = Pattern.compile("(?:[0-9]{2,4}[/\\-\\.][0-9]+[/\\-\\.][0-9]+|[0-9]+[/\\-\\.][0-9]+[/\\-\\.][0-9]{2,4}|[0-9]+[/\\-\\.]?[0-9]+)");
    public static final Pattern DATE_PATTERN5 = Pattern.compile("[昨今明][天晨晚夜早]");
    public static final Pattern TIME_PATTERN1 = Pattern.compile(".+(?::|点|时)(?:过|欠|差)?(?:.+(?::|分)?|整?|钟?|.+刻)?(?:.+秒?)");
    private static final Pattern CHINESE_AND_ARABIC_NUMERALS_PATTERN = Pattern.compile("[一二三四五六七八九零十〇\\d]+");
    public static final String[] CURRENCY_WORDS_VALUES = {"越南盾", "美元", "欧元", "澳元", "加元", "日元", "韩元", "英镑", "法郎", "卢比", "卢布", "马克", "先令", "克朗", "泰铢", "盾", "铢", "刀", "镑", "元"};
    public static final String[] DATE_WORDS_VALUES = {"明天", "后天", "昨天", "前天", "明年", "后年", "去年", "前年", "昨日", "明日", "来年", "上月", "本月", "目前", "今后", "未来", "日前", "最近", "当时", "后来", "那时", "这时", "今", "今天", "当今", "如今", "之后", "当代", "以前", "现在", "将来", "此时", "此前", "元旦"};
    public static final HashSet<String> DATE_WORDS = new HashSet<>(Arrays.asList(DATE_WORDS_VALUES));
    public static final String[] TIME_WORDS_VALUES = {"早晨", "清晨", "凌晨", "上午", "中午", "下午", "傍晚", "晚上", "夜间", "晨间", "晚间", "午前", "午后", "早", "晚"};
    public static final HashSet<String> TIME_WORDS = new HashSet<>(Arrays.asList(TIME_WORDS_VALUES));

    public ChineseNumberSequenceClassifier() {
        this(new Properties(), false, new Properties());
    }

    public ChineseNumberSequenceClassifier(boolean z) {
        this(new Properties(), z, new Properties());
    }

    public ChineseNumberSequenceClassifier(Properties properties, boolean z, Properties properties2) {
        super(properties);
        this.useSUTime = z;
        if (this.useSUTime) {
            log.warn("SUTime currently does not support Chinese. Ignore property ner.useSUTime.");
        }
        this.timexExtractor = null;
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public List<CoreLabel> classify(List<CoreLabel> list) {
        PaddedList paddedList = new PaddedList(list, this.pad);
        int size = paddedList.size();
        for (int i = 0; i < size; i++) {
            CoreLabel coreLabel = (CoreLabel) paddedList.get(i);
            CoreLabel coreLabel2 = (CoreLabel) paddedList.get(i - 1);
            CoreLabel coreLabel3 = (CoreLabel) paddedList.get(i + 1);
            coreLabel.set(CoreAnnotations.AnswerAnnotation.class, this.flags.backgroundSymbol);
            if (coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("OD")) {
                coreLabel.set(CoreAnnotations.AnswerAnnotation.class, ORDINAL_TAG);
            } else if (CURRENCY_WORD_PATTERN.matcher(coreLabel.word()).matches() && coreLabel2.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) {
                coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "MONEY");
            } else if (coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) {
                if (PERCENT_WORD_PATTERN1.matcher(coreLabel.word()).matches() || PERCENT_WORD_PATTERN2.matcher(coreLabel.word()).matches()) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "PERCENT");
                } else if (rightScanFindsMoneyWord(paddedList, i)) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "MONEY");
                } else if (coreLabel.word().length() == 2 && CHINESE_AND_ARABIC_NUMERALS_PATTERN.matcher(coreLabel.word()).matches() && DATE_AGE_LOCALIZER.equals(coreLabel3.word())) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                } else {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "NUMBER");
                }
            } else if (coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NT")) {
                if (DATE_PATTERN1.matcher(coreLabel.word()).matches() || DATE_PATTERN2.matcher(coreLabel.word()).matches() || DATE_PATTERN3.matcher(coreLabel.word()).matches() || DATE_PATTERN4.matcher(coreLabel.word()).matches() || DATE_PATTERN5.matcher(coreLabel.word()).matches() || DATE_WORDS.contains(coreLabel.word())) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                } else if (TIME_PATTERN1.matcher(coreLabel.word()).matches() || TIME_WORDS.contains(coreLabel.word())) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "TIME");
                } else {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "TIME");
                }
            } else if (DATE_AGE_LOCALIZER.equals(coreLabel.word()) && coreLabel2.word() != null && coreLabel2.word().length() == 2 && CHINESE_AND_ARABIC_NUMERALS_PATTERN.matcher(coreLabel2.word()).matches()) {
                coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
            }
        }
        return list;
    }

    private static boolean rightScanFindsMoneyWord(List<CoreLabel> list, int i) {
        int i2 = i;
        int size = list.size();
        while (i2 < size && list.get(i2).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) {
            i2++;
        }
        if (i2 >= size) {
            return false;
        }
        String string = list.get(i2).getString(CoreAnnotations.PartOfSpeechAnnotation.class);
        return (string.equals("M") || string.equals("NN") || string.equals("NNS")) && CURRENCY_WORD_PATTERN.matcher(list.get(i2).word()).matches();
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public List<CoreLabel> classifyWithGlobalInformation(List<CoreLabel> list, CoreMap coreMap, CoreMap coreMap2) {
        if (this.useSUTime) {
            log.warn("Warning: ChineseNumberSequenceClassifier does not have SUTime implementation.");
        }
        return classify(list);
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void train(Collection<List<CoreLabel>> collection, DocumentReaderAndWriter<CoreLabel> documentReaderAndWriter) {
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void serializeClassifier(String str) {
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void serializeClassifier(ObjectOutputStream objectOutputStream) {
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void loadClassifier(ObjectInputStream objectInputStream, Properties properties) throws IOException, ClassCastException, ClassNotFoundException {
    }

    public static void main(String[] strArr) throws IOException {
    }
}
