package edu.stanford.nlp.process;

import java.util.regex.Pattern;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.springframework.beans.factory.BeanFactory;

/* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.5.6.jar:edu/stanford/nlp/process/LexerUtils.class */
public class LexerUtils {
    private static final String ptb3EllipsisStr = "...";
    private static final String unicodeEllipsisStr = "…";
    private static final String PENN_LEFT_PAREN = "-LRB-";
    private static final String PENN_RIGHT_PAREN = "-RRB-";
    private static final Pattern CENTS_PATTERN = Pattern.compile("¢");
    private static final Pattern POUND_PATTERN = Pattern.compile("£");
    private static final Pattern GENERIC_CURRENCY_PATTERN = Pattern.compile("[\u0080¤₠€₹]");
    private static final Pattern CP1252_EURO_PATTERN = Pattern.compile("\u0080");
    private static final Pattern ONE_FOURTH_PATTERN = Pattern.compile("¼");
    private static final Pattern ONE_HALF_PATTERN = Pattern.compile("½");
    private static final Pattern THREE_FOURTHS_PATTERN = Pattern.compile("¾");
    private static final Pattern ONE_THIRD_PATTERN = Pattern.compile("⅓");
    private static final Pattern TWO_THIRDS_PATTERN = Pattern.compile("⅔");
    private static final Pattern SINGLE_SPACE_PATTERN = Pattern.compile("[ ]");
    private static final Pattern NON_WORD_REMOVE_CHARS = Pattern.compile("[\u00ad\u200c\u200d\u2060]");
    private static final Pattern AMP_PATTERN = Pattern.compile("(?i:&amp;)");
    private static final Pattern singleQuote = Pattern.compile("&apos;|'");
    private static final Pattern doubleQuote = Pattern.compile("\"|''|'`|`'|&quot;");
    private static final Pattern asciiSingleQuote = Pattern.compile("&apos;|[\u0082\u008b\u0091´‘\u0092’\u009b‚‛‹›']");
    private static final Pattern asciiDoubleQuote = Pattern.compile("&quot;|[\u0084\u0093“\u0094”„«»\"]");
    private static final Pattern leftSingleQuote = Pattern.compile("[\u0082\u008b\u0091‘‚‛‹]");
    private static final Pattern rightSingleQuote = Pattern.compile("[\u0092\u009b´’›]");
    private static final Pattern leftDoubleQuote = Pattern.compile("[\u0084\u0093“„«]|[\u0091‘]'");
    private static final Pattern rightDoubleQuote = Pattern.compile("[\u0094”»]|[\u0092’]'");
    private static final Pattern unicodeLeftSingleQuote = Pattern.compile("\u0091");
    private static final Pattern unicodeRightSingleQuote = Pattern.compile("\u0092");
    private static final Pattern unicodeLeftDoubleQuote = Pattern.compile("\u0093");
    private static final Pattern unicodeRightDoubleQuote = Pattern.compile("\u0094");
    private static final Pattern leftDuck = Pattern.compile("\u008b");
    private static final Pattern rightDuck = Pattern.compile("\u009b");
    private static final Pattern LEFT_PAREN_PATTERN = Pattern.compile("\\(");
    private static final Pattern RIGHT_PAREN_PATTERN = Pattern.compile("\\)");

    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.5.6.jar:edu/stanford/nlp/process/LexerUtils$DashesEnum.class */
    public enum DashesEnum {
        UNICODE,
        ASCII,
        NOT_CP1252,
        ORIGINAL
    }

    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.5.6.jar:edu/stanford/nlp/process/LexerUtils$EllipsesEnum.class */
    public enum EllipsesEnum {
        UNICODE,
        ASCII,
        NOT_CP1252,
        ORIGINAL
    }

    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.5.6.jar:edu/stanford/nlp/process/LexerUtils$QuotesEnum.class */
    public enum QuotesEnum {
        UNICODE,
        LATEX,
        ASCII,
        NOT_CP1252,
        ORIGINAL
    }

    private LexerUtils() {
    }

    public static String normalizeFractions(boolean z, boolean z2, String str) {
        String str2 = str;
        if (z) {
            if (z2) {
                str2 = TWO_THIRDS_PATTERN.matcher(ONE_THIRD_PATTERN.matcher(THREE_FOURTHS_PATTERN.matcher(ONE_HALF_PATTERN.matcher(ONE_FOURTH_PATTERN.matcher(str2).replaceAll("1\\\\/4")).replaceAll("1\\\\/2")).replaceAll("3\\\\/4")).replaceAll("1\\\\/3")).replaceAll("2\\\\/3");
            } else {
                str2 = TWO_THIRDS_PATTERN.matcher(ONE_THIRD_PATTERN.matcher(THREE_FOURTHS_PATTERN.matcher(ONE_HALF_PATTERN.matcher(ONE_FOURTH_PATTERN.matcher(str2).replaceAll("1/4")).replaceAll("1/2")).replaceAll("3/4")).replaceAll("1/3")).replaceAll("2/3");
            }
        }
        return str2;
    }

    public static String normalizeCurrency(String str) {
        return CP1252_EURO_PATTERN.matcher(GENERIC_CURRENCY_PATTERN.matcher(POUND_PATTERN.matcher(CENTS_PATTERN.matcher(str).replaceAll("cents")).replaceAll("#")).replaceAll("\\$")).replaceAll("€");
    }

    public static String minimallyNormalizeCurrency(String str) {
        return CP1252_EURO_PATTERN.matcher(str).replaceAll("€");
    }

    public static String removeSoftHyphens(String str) {
        String replaceAll = NON_WORD_REMOVE_CHARS.matcher(str).replaceAll("");
        if (replaceAll.isEmpty()) {
            replaceAll = "-";
        }
        return replaceAll;
    }

    public static String processCp1252misc(String str) {
        boolean z = -1;
        switch (str.hashCode()) {
            case 134:
                if (str.equals("\u0086")) {
                    z = false;
                    break;
                }
                break;
            case 135:
                if (str.equals("\u0087")) {
                    z = true;
                    break;
                }
                break;
            case 137:
                if (str.equals("\u0089")) {
                    z = 2;
                    break;
                }
                break;
            case 149:
                if (str.equals("\u0095")) {
                    z = 3;
                    break;
                }
                break;
            case 152:
                if (str.equals("\u0098")) {
                    z = 4;
                    break;
                }
                break;
            case 153:
                if (str.equals("\u0099")) {
                    z = 5;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                return "†";
            case true:
                return "‡";
            case true:
                return "‰";
            case true:
                return "•";
            case true:
                return "˜";
            case true:
                return "™";
            default:
                throw new IllegalArgumentException("Bad process cp1252");
        }
    }

    public static String normalizeAmp(String str) {
        return AMP_PATTERN.matcher(str).replaceAll(BeanFactory.FACTORY_BEAN_PREFIX);
    }

    public static String escapeChar(String str, char c) {
        int indexOf = str.indexOf(c);
        while (true) {
            int i = indexOf;
            if (i == -1) {
                return str;
            }
            if (i == 0 || str.charAt(i - 1) != '\\') {
                str = str.substring(0, i) + '\\' + str.substring(i);
                indexOf = str.indexOf(c, i + 2);
            } else {
                indexOf = str.indexOf(c, i + 1);
            }
        }
    }

    public static String asciiQuotes(String str) {
        return asciiDoubleQuote.matcher(asciiSingleQuote.matcher(str).replaceAll(OperatorName.SHOW_TEXT_LINE)).replaceAll("\"");
    }

    private static String latexQuotes(String str, boolean z) {
        String replaceAll;
        if (z) {
            replaceAll = doubleQuote.matcher(singleQuote.matcher(str).replaceAll("`")).replaceAll("``");
        } else {
            replaceAll = doubleQuote.matcher(singleQuote.matcher(str).replaceAll(OperatorName.SHOW_TEXT_LINE)).replaceAll("''");
        }
        return rightDoubleQuote.matcher(leftDoubleQuote.matcher(rightSingleQuote.matcher(leftSingleQuote.matcher(replaceAll).replaceAll("`")).replaceAll(OperatorName.SHOW_TEXT_LINE)).replaceAll("``")).replaceAll("''");
    }

    private static String unicodeQuotes(String str, boolean z) {
        String replaceAll;
        if (z) {
            replaceAll = doubleQuote.matcher(singleQuote.matcher(str).replaceAll("‘")).replaceAll("“");
        } else {
            replaceAll = doubleQuote.matcher(singleQuote.matcher(str).replaceAll("’")).replaceAll("”");
        }
        return rightDuck.matcher(leftDuck.matcher(unicodeRightDoubleQuote.matcher(unicodeLeftDoubleQuote.matcher(unicodeRightSingleQuote.matcher(unicodeLeftSingleQuote.matcher(replaceAll).replaceAll("‘")).replaceAll("’")).replaceAll("“")).replaceAll("”")).replaceAll("‹")).replaceAll("›");
    }

    private static String nonCp1252Quotes(String str) {
        return str.replace((char) 139, (char) 8249).replace((char) 145, (char) 8216).replace((char) 146, (char) 8217).replace((char) 147, (char) 8220).replace((char) 148, (char) 8221).replace((char) 155, (char) 8250);
    }

    public static String handleQuotes(String str, boolean z, QuotesEnum quotesEnum) {
        switch (quotesEnum) {
            case LATEX:
                return latexQuotes(str, z);
            case UNICODE:
                return unicodeQuotes(str, z);
            case ASCII:
                return asciiQuotes(str);
            case NOT_CP1252:
                return nonCp1252Quotes(str);
            case ORIGINAL:
            default:
                return str;
        }
    }

    public static String handleEllipsis(String str, EllipsesEnum ellipsesEnum) {
        switch (ellipsesEnum) {
            case UNICODE:
                return unicodeEllipsisStr;
            case ASCII:
                return "...";
            case NOT_CP1252:
                return str.equals("\u0085") ? unicodeEllipsisStr : SINGLE_SPACE_PATTERN.matcher(str).replaceAll(" ");
            case ORIGINAL:
            default:
                return SINGLE_SPACE_PATTERN.matcher(str).replaceAll(" ");
        }
    }

    public static String handleDashes(String str, DashesEnum dashesEnum) {
        switch (dashesEnum) {
            case UNICODE:
                return ("-".equals(str) || "\u0096".equals(str) || "&ndash;".equals(str)) ? "–" : "—";
            case ASCII:
                String replaceAll = str.replaceAll("[-_֊‐‑‒]", "-").replaceAll("[\u0096\u0097–—―]|&MD;|&[mn]dash;", "--");
                if ("---".equals(replaceAll)) {
                    replaceAll = "--";
                }
                return replaceAll;
            case NOT_CP1252:
                return str.equals("\u0096") ? "–" : str.equals("\u0097") ? "—" : str;
            case ORIGINAL:
            default:
                return str;
        }
    }

    public static String pennNormalizeParens(String str, boolean z) {
        String str2 = str;
        if (z) {
            str2 = RIGHT_PAREN_PATTERN.matcher(LEFT_PAREN_PATTERN.matcher(str2).replaceAll("-LRB-")).replaceAll("-RRB-");
        }
        return str2;
    }
}
