package edu.stanford.nlp.process;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.eclipse.jgit.transport.WalkEncryption;

/* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/process/PTBTokenizer.class */
public class PTBTokenizer<T extends HasWord> extends AbstractTokenizer<T> {
    private static final Redwood.RedwoodChannels log = Redwood.channels(PTBTokenizer.class);
    private final PTBLexer lexer;

    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/process/PTBTokenizer$PTBTokenizerFactory.class */
    public static class PTBTokenizerFactory<T extends HasWord> implements TokenizerFactory<T> {
        private static final long serialVersionUID = -8859638719818931606L;
        protected final LexedTokenFactory<T> factory;
        protected String options;

        public static TokenizerFactory<Word> newTokenizerFactory() {
            return newPTBTokenizerFactory(new WordTokenFactory(), "invertible=false");
        }

        public static PTBTokenizerFactory<Word> newWordTokenizerFactory(String str) {
            return new PTBTokenizerFactory<>(new WordTokenFactory(), "invertible=false," + str);
        }

        public static PTBTokenizerFactory<CoreLabel> newCoreLabelTokenizerFactory(String str) {
            return new PTBTokenizerFactory<>(new CoreLabelTokenFactory(), str);
        }

        public static <T extends HasWord> PTBTokenizerFactory<T> newPTBTokenizerFactory(LexedTokenFactory<T> lexedTokenFactory, String str) {
            return new PTBTokenizerFactory<>(lexedTokenFactory, str);
        }

        public static PTBTokenizerFactory<CoreLabel> newPTBTokenizerFactory(boolean z, boolean z2) {
            return new PTBTokenizerFactory<>(z, z2, false, new CoreLabelTokenFactory());
        }

        private PTBTokenizerFactory(boolean z, boolean z2, boolean z3, LexedTokenFactory<T> lexedTokenFactory) {
            this.factory = lexedTokenFactory;
            StringBuilder sb = new StringBuilder();
            if (z3) {
                sb.append("ptb3Escaping=false");
            } else {
                sb.append("ptb3Escaping=true");
            }
            if (z) {
                sb.append(",tokenizeNLs");
            }
            if (z2) {
                sb.append(",invertible");
            }
            this.options = sb.toString();
        }

        private PTBTokenizerFactory(LexedTokenFactory<T> lexedTokenFactory, String str) {
            this.factory = lexedTokenFactory;
            this.options = str;
        }

        @Override // edu.stanford.nlp.objectbank.IteratorFromReaderFactory
        public Iterator<T> getIterator(Reader reader) {
            return getTokenizer(reader);
        }

        @Override // edu.stanford.nlp.process.TokenizerFactory
        public Tokenizer<T> getTokenizer(Reader reader) {
            return new PTBTokenizer(reader, this.factory, this.options);
        }

        @Override // edu.stanford.nlp.process.TokenizerFactory
        public Tokenizer<T> getTokenizer(Reader reader, String str) {
            return (this.options == null || this.options.isEmpty()) ? new PTBTokenizer(reader, this.factory, str) : new PTBTokenizer(reader, this.factory, this.options + ',' + str);
        }

        @Override // edu.stanford.nlp.process.TokenizerFactory
        public void setOptions(String str) {
            this.options = str;
        }
    }

    public static PTBTokenizer<Word> newPTBTokenizer(Reader reader) {
        return new PTBTokenizer<>(reader, new WordTokenFactory(), "invertible=false");
    }

    public static PTBTokenizer<CoreLabel> newPTBTokenizer(Reader reader, boolean z, boolean z2) {
        return new PTBTokenizer<>(reader, z, z2, false, new CoreLabelTokenFactory());
    }

    private PTBTokenizer(Reader reader, boolean z, boolean z2, boolean z3, LexedTokenFactory<T> lexedTokenFactory) {
        StringBuilder sb = new StringBuilder();
        if (z3) {
            sb.append("ptb3Escaping=false");
        } else {
            sb.append("ptb3Escaping=true");
        }
        if (z) {
            sb.append(",tokenizeNLs");
        }
        if (z2) {
            sb.append(",invertible");
        }
        this.lexer = new PTBLexer(reader, lexedTokenFactory, sb.toString());
    }

    public PTBTokenizer(Reader reader, LexedTokenFactory<T> lexedTokenFactory, String str) {
        this.lexer = new PTBLexer(reader, lexedTokenFactory, str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // edu.stanford.nlp.process.AbstractTokenizer
    public T getNext() {
        try {
            return (T) this.lexer.next();
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    public static String getNewlineToken() {
        return "*NL*";
    }

    public static String ptb2Text(String str) {
        StringBuilder sb = new StringBuilder(str.length());
        PTB2TextLexer pTB2TextLexer = new PTB2TextLexer(new StringReader(str));
        while (true) {
            try {
                String next = pTB2TextLexer.next();
                if (next == null) {
                    return sb.toString();
                }
                sb.append(next);
            } catch (IOException e) {
                throw new RuntimeIOException(e);
            }
        }
    }

    public static String ptbToken2Text(String str) {
        return ptb2Text(' ' + str + ' ').trim();
    }

    public static int ptb2Text(Reader reader, Writer writer) throws IOException {
        int i = 0;
        PTB2TextLexer pTB2TextLexer = new PTB2TextLexer(reader);
        while (true) {
            String next = pTB2TextLexer.next();
            if (next == null) {
                return i;
            }
            i++;
            writer.write(next);
        }
    }

    /* JADX WARN: Finally extract failed */
    private static void untok(List<String> list, List<String> list2, String str) throws IOException {
        BufferedWriter bufferedWriter;
        long nanoTime = System.nanoTime();
        int i = 0;
        int size = list.size();
        if (size == 0) {
            InputStreamReader inputStreamReader = new InputStreamReader(System.in, str);
            BufferedWriter bufferedWriter2 = new BufferedWriter(new OutputStreamWriter(System.out, str));
            i = ptb2Text(inputStreamReader, bufferedWriter2);
            bufferedWriter2.close();
        } else {
            for (int i2 = 0; i2 < size; i2++) {
                BufferedReader readerFromString = IOUtils.readerFromString(list.get(i2), str);
                Throwable th = null;
                if (list2 == null) {
                    try {
                        try {
                            bufferedWriter = new BufferedWriter(new OutputStreamWriter(System.out, str));
                        } finally {
                        }
                    } catch (Throwable th2) {
                        if (readerFromString != null) {
                            if (th != null) {
                                try {
                                    readerFromString.close();
                                } catch (Throwable th3) {
                                    th.addSuppressed(th3);
                                }
                            } else {
                                readerFromString.close();
                            }
                        }
                        throw th2;
                    }
                } else {
                    bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(list2.get(i2)), str));
                }
                try {
                    i += ptb2Text(readerFromString, bufferedWriter);
                    bufferedWriter.close();
                    if (readerFromString != null) {
                        if (0 != 0) {
                            try {
                                readerFromString.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            readerFromString.close();
                        }
                    }
                } catch (Throwable th5) {
                    bufferedWriter.close();
                    throw th5;
                }
            }
        }
        System.err.printf("PTBTokenizer untokenized %d tokens at %.2f tokens per second.%n", Integer.valueOf(i), Double.valueOf(i / ((System.nanoTime() - nanoTime) / 1.0E9d)));
    }

    public static String ptb2Text(List<String> list) {
        return ptb2Text(StringUtils.join(list));
    }

    public static String labelList2Text(List<? extends HasWord> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<? extends HasWord> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().word());
        }
        return ptb2Text(arrayList);
    }

    private static void tok(List<String> list, List<String> list2, String str, Pattern pattern, Pattern pattern2, String str2, boolean z, boolean z2, boolean z3, boolean z4) throws IOException {
        long nanoTime = System.nanoTime();
        long j = 0;
        int size = list.size();
        if (size == 0) {
            BufferedReader readerFromStdin = IOUtils.readerFromStdin(str);
            j = 0 + tokReader(readerFromStdin, r0, pattern, pattern2, str2, z, z2, z3, z4);
            IOUtils.closeIgnoringExceptions(new BufferedWriter(new OutputStreamWriter(System.out, str)));
        } else {
            BufferedWriter bufferedWriter = list2 == null ? new BufferedWriter(new OutputStreamWriter(System.out, str)) : null;
            for (int i = 0; i < size; i++) {
                BufferedReader readerFromString = IOUtils.readerFromString(list.get(i), str);
                Throwable th = null;
                if (list2 != null) {
                    try {
                        try {
                            bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(list2.get(i)), str));
                        } finally {
                        }
                    } catch (Throwable th2) {
                        if (readerFromString != null) {
                            if (th != null) {
                                try {
                                    readerFromString.close();
                                } catch (Throwable th3) {
                                    th.addSuppressed(th3);
                                }
                            } else {
                                readerFromString.close();
                            }
                        }
                        throw th2;
                    }
                }
                j += tokReader(readerFromString, bufferedWriter, pattern, pattern2, str2, z, z2, z3, z4);
                if (readerFromString != null) {
                    if (0 != 0) {
                        try {
                            readerFromString.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        readerFromString.close();
                    }
                }
                if (list2 != null) {
                    IOUtils.closeIgnoringExceptions(bufferedWriter);
                }
            }
            if (list2 == null) {
                IOUtils.closeIgnoringExceptions(bufferedWriter);
            }
        }
        System.err.printf("PTBTokenizer tokenized %d tokens at %.2f tokens per second.%n", Long.valueOf(j), Double.valueOf(j / ((System.nanoTime() - nanoTime) / 1.0E9d)));
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static int tokReader(Reader reader, BufferedWriter bufferedWriter, Pattern pattern, Pattern pattern2, String str, boolean z, boolean z2, boolean z3, boolean z4) throws IOException {
        String str2;
        int i = 0;
        boolean z5 = true;
        boolean z6 = pattern == null;
        Matcher matcher = null;
        if (pattern != null) {
            matcher = pattern.matcher("");
        }
        PTBTokenizer pTBTokenizer = new PTBTokenizer(reader, new CoreLabelTokenFactory(), str);
        while (pTBTokenizer.hasNext()) {
            CoreLabel coreLabel = (CoreLabel) pTBTokenizer.next();
            String str3 = (String) coreLabel.get(CoreAnnotations.TextAnnotation.class);
            if (z4) {
                str2 = str3.toLowerCase(Locale.ENGLISH);
                coreLabel.set(CoreAnnotations.TextAnnotation.class, str2);
            } else {
                str2 = str3;
            }
            if (matcher != null && matcher.reset(str3).matches()) {
                z6 = matcher.group(1).isEmpty();
                if (!z6) {
                    z5 = true;
                    if (z2) {
                        bufferedWriter.newLine();
                    }
                }
            } else if (z6) {
                if (z3) {
                    str2 = coreLabel.toShorterString(new String[0]);
                }
                if (pattern2 == null || !pattern2.matcher(str3).matches()) {
                    if (z) {
                        if ("*NL*".equals(str3)) {
                            z5 = true;
                            bufferedWriter.newLine();
                        } else {
                            if (z5) {
                                z5 = false;
                            } else {
                                bufferedWriter.write(32);
                            }
                            bufferedWriter.write(str2);
                        }
                    } else if (z2) {
                        if (z5) {
                            z5 = false;
                        } else {
                            bufferedWriter.write(32);
                        }
                        bufferedWriter.write(str2);
                    } else {
                        bufferedWriter.write(str2);
                        bufferedWriter.newLine();
                    }
                }
            }
            i++;
        }
        return i;
    }

    public static TokenizerFactory<Word> factory() {
        return PTBTokenizerFactory.newTokenizerFactory();
    }

    public static TokenizerFactory<CoreLabel> factory(boolean z, boolean z2) {
        return PTBTokenizerFactory.newPTBTokenizerFactory(z, z2);
    }

    public static TokenizerFactory<CoreLabel> coreLabelFactory() {
        return coreLabelFactory("");
    }

    public static TokenizerFactory<CoreLabel> coreLabelFactory(String str) {
        return PTBTokenizerFactory.newPTBTokenizerFactory(new CoreLabelTokenFactory(), str);
    }

    public static <T extends HasWord> TokenizerFactory<T> factory(LexedTokenFactory<T> lexedTokenFactory, String str) {
        return new PTBTokenizerFactory(lexedTokenFactory, str);
    }

    private static Map<String, Integer> optionArgDefs() {
        Map<String, Integer> newHashMap = Generics.newHashMap();
        newHashMap.put("options", 1);
        newHashMap.put("ioFileList", 0);
        newHashMap.put("fileList", 0);
        newHashMap.put("lowerCase", 0);
        newHashMap.put(ArchiveStreamFactory.DUMP, 0);
        newHashMap.put("untok", 0);
        newHashMap.put("encoding", 1);
        newHashMap.put("parseInside", 1);
        newHashMap.put("filter", 1);
        newHashMap.put("preserveLines", 0);
        newHashMap.put("oneLinePerElement", 0);
        return newHashMap;
    }

    public static void main(String[] strArr) throws IOException {
        Properties argsToProperties = StringUtils.argsToProperties(strArr, optionArgDefs());
        if (PropertiesUtils.getBool(argsToProperties, "h", PropertiesUtils.getBool(argsToProperties, "help", false))) {
            log.info("Usage: java edu.stanford.nlp.process.PTBTokenizer [options]* filename*");
            log.info("  options: -h|-help|-options tokenizerOptions|-encoding encoding|-dump|");
            log.info("           -lowerCase|-preserveLines|-oneLinePerElement|-filter regex|");
            log.info("           -parseInside regex|-fileList|-ioFileList|-untok");
            return;
        }
        StringBuilder sb = new StringBuilder();
        String property = argsToProperties.getProperty("options", null);
        if (property != null) {
            sb.append(property);
        }
        boolean bool = PropertiesUtils.getBool(argsToProperties, "preserveLines", false);
        if (bool) {
            sb.append(",tokenizeNLs");
        }
        boolean bool2 = PropertiesUtils.getBool(argsToProperties, "oneLinePerElement", false);
        boolean bool3 = PropertiesUtils.getBool(argsToProperties, "ioFileList", false);
        boolean bool4 = PropertiesUtils.getBool(argsToProperties, "fileList", false);
        boolean bool5 = PropertiesUtils.getBool(argsToProperties, "lowerCase", false);
        boolean bool6 = PropertiesUtils.getBool(argsToProperties, ArchiveStreamFactory.DUMP, false);
        boolean bool7 = PropertiesUtils.getBool(argsToProperties, "untok", false);
        String property2 = argsToProperties.getProperty("encoding", "utf-8");
        String property3 = argsToProperties.getProperty("parseInside", null);
        Pattern pattern = null;
        if (property3 != null) {
            try {
                pattern = Pattern.compile("<(/?)(?:" + property3 + ")(?:(?:\\s| )[^>]*?)?>");
            } catch (PatternSyntaxException e) {
            }
        }
        String property4 = argsToProperties.getProperty("filter", null);
        Pattern pattern2 = null;
        if (property4 != null) {
            try {
                pattern2 = Pattern.compile(property4);
            } catch (PatternSyntaxException e2) {
            }
        }
        String property5 = argsToProperties.getProperty("", null);
        String[] split = property5 == null ? null : property5.split(WalkEncryption.Vals.REGEX_WS);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = null;
        if (split != null) {
            if (bool4 || bool3) {
                arrayList2 = new ArrayList();
                for (String str : split) {
                    BufferedReader readerFromString = IOUtils.readerFromString(str, property2);
                    while (true) {
                        String readLine = readerFromString.readLine();
                        if (readLine != null) {
                            String[] split2 = readLine.split(WalkEncryption.Vals.REGEX_WS);
                            arrayList.add(split2[0]);
                            if (split2.length > 1) {
                                arrayList2.add(split2[1]);
                            } else {
                                arrayList2.add(split2[0] + ".tok");
                            }
                        }
                    }
                    readerFromString.close();
                }
                if (bool4) {
                    arrayList2 = null;
                }
            } else {
                arrayList.addAll(Arrays.asList(split));
            }
        }
        if (bool7) {
            untok(arrayList, arrayList2, property2);
        } else {
            tok(arrayList, arrayList2, property2, pattern, pattern2, sb.toString(), bool, bool2, bool6, bool5);
        }
    }
}
