package edu.stanford.nlp.trees;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.util.ReflectionLoading;
import edu.stanford.nlp.util.Timing;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.function.Predicate;
import org.apache.batik.svggen.font.SVGFont;

/* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/trees/Treebanks.class */
public class Treebanks {
    private static final Redwood.RedwoodChannels log = Redwood.channels(Treebanks.class);

    private Treebanks() {
    }

    private static void printUsage() {
        log.info("This main method will let you variously manipulate and view a treebank.");
        log.info("Usage: java Treebanks [-flags]* treebankPath [fileRanges]");
        log.info("Useful flags include:");
        log.info("\t-maxLength n\t-suffix ext\t-treeReaderFactory class");
        log.info("\t-pennPrint\t-encoding enc\t-tlp class\t-sentenceLengths");
        log.info("\t-summary\t-decimate\t-yield\t-correct\t-punct");
        log.info("\t-oneLine\t-words\t-taggedWords\t-annotate options");
        log.info("\t-filter <class>: class implements Predicate<Tree>, this filters trees which return false");
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length == 0) {
            printUsage();
            return;
        }
        int i = 0;
        int i2 = Integer.MAX_VALUE;
        int i3 = -1;
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        boolean z5 = false;
        boolean z6 = false;
        boolean z7 = false;
        String str = null;
        boolean z8 = false;
        boolean z9 = false;
        boolean z10 = false;
        boolean z11 = false;
        boolean z12 = false;
        boolean z13 = false;
        boolean z14 = false;
        String str2 = null;
        String str3 = "UTF-8";
        String str4 = Treebank.DEFAULT_TREE_FILE_SUFFIX;
        TreeReaderFactory treeReaderFactory = null;
        TreebankLanguagePack treebankLanguagePack = null;
        ArrayList arrayList = new ArrayList();
        while (i < strArr.length && strArr[i].startsWith("-")) {
            if (strArr[i].equals("-maxLength") && i + 1 < strArr.length) {
                i2 = Integer.parseInt(strArr[i + 1]);
                i += 2;
            } else if (strArr[i].equals("-minLength") && i + 1 < strArr.length) {
                i3 = Integer.parseInt(strArr[i + 1]);
                i += 2;
            } else if (strArr[i].equals(SVGFont.ARG_KEY_CHAR_RANGE_HIGH) || strArr[i].equals("-help")) {
                printUsage();
                i++;
            } else if (strArr[i].equals("-normalized")) {
                z = true;
                i++;
            } else if (strArr[i].equalsIgnoreCase("-tlp")) {
                try {
                    treebankLanguagePack = (TreebankLanguagePack) Class.forName(strArr[i + 1]).getDeclaredConstructor(new Class[0]).newInstance(new Object[0]);
                    treeReaderFactory = treebankLanguagePack.treeReaderFactory();
                    i += 2;
                } catch (Exception e) {
                    log.info("Couldn't instantiate as TreebankLanguagePack: " + strArr[i + 1]);
                    return;
                }
            } else if (strArr[i].equals("-treeReaderFactory") || strArr[i].equals("-trf")) {
                try {
                    treeReaderFactory = (TreeReaderFactory) Class.forName(strArr[i + 1]).getDeclaredConstructor(new Class[0]).newInstance(new Object[0]);
                    i += 2;
                } catch (Exception e2) {
                    log.info("Couldn't instantiate as TreeReaderFactory: " + strArr[i + 1]);
                    return;
                }
            } else if (strArr[i].equals("-suffix")) {
                str4 = strArr[i + 1];
                i += 2;
            } else if (strArr[i].equals("-decimate")) {
                z2 = true;
                str2 = strArr[i + 1];
                i += 2;
            } else if (strArr[i].equals("-encoding")) {
                str3 = strArr[i + 1];
                i += 2;
            } else if (strArr[i].equals("-correct")) {
                z7 = true;
                i++;
            } else if (strArr[i].equals("-summary")) {
                z8 = true;
                i++;
            } else if (strArr[i].equals("-yield")) {
                z10 = true;
                i++;
            } else if (strArr[i].equals("-punct")) {
                z11 = true;
                i++;
            } else if (strArr[i].equals("-pennPrint")) {
                z3 = true;
                i++;
            } else if (strArr[i].equals("-oneLine")) {
                z4 = true;
                i++;
            } else if (strArr[i].equals("-taggedWords")) {
                z5 = true;
                i++;
            } else if (strArr[i].equals("-words")) {
                z6 = true;
                i++;
            } else if (strArr[i].equals("-annotate")) {
                str = strArr[i + 1];
                i += 2;
            } else if (strArr[i].equals("-timing")) {
                z9 = true;
                i++;
            } else if (strArr[i].equals("-countTaggings")) {
                z13 = true;
                i++;
            } else if (strArr[i].equals("-sentenceLengths")) {
                z12 = true;
                i++;
            } else if (strArr[i].equals("-removeCodeTrees")) {
                z14 = true;
                i++;
            } else if (strArr[i].equals("-filter")) {
                arrayList.add((Predicate) ReflectionLoading.loadByReflection(strArr[i + 1], new Object[0]));
                i += 2;
            } else {
                log.info("Unknown option: " + strArr[i]);
                i++;
            }
        }
        int i4 = i2;
        int i5 = i3;
        if (treeReaderFactory == null) {
            treeReaderFactory = reader -> {
                return new PennTreeReader(reader, new LabeledScoredTreeFactory());
            };
        }
        Treebank diskTreebank = z ? new DiskTreebank() : new DiskTreebank(treeReaderFactory, str3);
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            diskTreebank = new FilteringTreebank(diskTreebank, (Predicate) it.next());
        }
        PrintWriter printWriter = new PrintWriter((Writer) new OutputStreamWriter(System.out, str3), true);
        if (i >= strArr.length) {
            printUsage();
            return;
        }
        while (i < strArr.length) {
            if (i + 1 < strArr.length) {
                try {
                    diskTreebank.loadPath(strArr[i], new NumberRangesFileFilter(strArr[i + 1], true));
                    i += 2;
                } catch (IllegalArgumentException e3) {
                    diskTreebank.loadPath(strArr[i], str4, true);
                    i++;
                }
            } else {
                diskTreebank.loadPath(strArr[i], str4, true);
                i++;
            }
        }
        if (str != null) {
            log.info("annotationOptions not yet implemented");
        }
        if (z8) {
            System.out.println(diskTreebank.textualSummary());
        }
        if (z12) {
            sentenceLengths(diskTreebank, strArr[i], i + 1 < strArr.length ? strArr[i + 1] : null, printWriter);
        }
        if (z11) {
            printPunct(diskTreebank, treebankLanguagePack, printWriter);
        }
        if (z7) {
            diskTreebank = new EnglishPTBTreebankCorrector().transformTrees(diskTreebank);
        }
        if (z3) {
            diskTreebank.apply(tree -> {
                int size = tree.yield().size();
                if (size < i5 || size > i4) {
                    return;
                }
                tree.pennPrint(printWriter);
                printWriter.println();
            });
        }
        if (z4) {
            diskTreebank.apply(tree2 -> {
                int size = tree2.yield().size();
                if (size < i5 || size > i4) {
                    return;
                }
                printWriter.println(tree2);
            });
        }
        if (z6) {
            BobChrisTreeNormalizer bobChrisTreeNormalizer = new BobChrisTreeNormalizer();
            diskTreebank.apply(tree3 -> {
                Tree normalizeWholeTree = bobChrisTreeNormalizer.normalizeWholeTree(tree3, tree3.treeFactory());
                int size = normalizeWholeTree.yield().size();
                if (size < i5 || size > i4) {
                    return;
                }
                printWriter.println(SentenceUtils.listToString(normalizeWholeTree.taggedYield()));
            });
        }
        if (z5) {
            BobChrisTreeNormalizer bobChrisTreeNormalizer2 = new BobChrisTreeNormalizer();
            diskTreebank.apply(tree4 -> {
                printWriter.println(SentenceUtils.listToString(bobChrisTreeNormalizer2.normalizeWholeTree(tree4, tree4.treeFactory()).taggedYield(), false, "_"));
            });
        }
        if (z13) {
            countTaggings(diskTreebank, printWriter);
        }
        if (z10) {
            diskTreebank.apply(tree5 -> {
                int size = tree5.yield().size();
                if (size < i5 || size > i4) {
                    return;
                }
                printWriter.println(SentenceUtils.listToString(tree5.yield()));
            });
        }
        if (z2) {
            diskTreebank.decimate(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2 + "-train.txt"), str3)), new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2 + "-dev.txt"), str3)), new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2 + "-test.txt"), str3)));
        }
        if (z9) {
            runTiming(diskTreebank);
        }
        if (z14) {
            if (new File(strArr[i]).isDirectory()) {
                throw new RuntimeException("-removeCodeTrees only works on a single file");
            }
            String replaceAll = IOUtils.slurpFile(strArr[i]).replaceAll("\\( \\(CODE <[^>]+>\\)\\)", "");
            OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(strArr[i]), str3);
            outputStreamWriter.write(replaceAll);
            outputStreamWriter.close();
        }
    }

    private static void printPunct(Treebank treebank, TreebankLanguagePack treebankLanguagePack, PrintWriter printWriter) {
        if (treebankLanguagePack == null) {
            log.info("The -punct option requires you to specify -tlp");
            return;
        }
        Predicate<String> punctuationTagAcceptFilter = treebankLanguagePack.punctuationTagAcceptFilter();
        Iterator<Tree> it = treebank.iterator();
        while (it.hasNext()) {
            for (TaggedWord taggedWord : it.next().taggedYield()) {
                if (punctuationTagAcceptFilter.test(taggedWord.tag())) {
                    printWriter.println(taggedWord);
                }
            }
        }
    }

    private static void countTaggings(Treebank treebank, PrintWriter printWriter) {
        TwoDimensionalCounter twoDimensionalCounter = new TwoDimensionalCounter();
        treebank.apply(tree -> {
            for (TaggedWord taggedWord : tree.taggedYield()) {
                twoDimensionalCounter.incrementCount(taggedWord.word(), taggedWord.tag());
            }
        });
        for (String str : twoDimensionalCounter.firstKeySet()) {
            printWriter.print(str);
            printWriter.print('\t');
            ClassicCounter counter = twoDimensionalCounter.getCounter((TwoDimensionalCounter) str);
            for (String str2 : counter.keySet()) {
                printWriter.print(str2 + '\t' + counter.getCount(str2) + '\t');
            }
            printWriter.println();
        }
    }

    private static void runTiming(Treebank treebank) {
        System.out.println();
        Timing.startTime();
        int i = 0;
        Iterator<Tree> it = treebank.iterator();
        while (it.hasNext()) {
            i += it.next().yield().size();
        }
        Timing.endTime("traversing corpus, counting words with iterator");
        log.info("There were " + i + " words in the treebank.");
        treebank.apply(new TreeVisitor() { // from class: edu.stanford.nlp.trees.Treebanks.1
            int num;

            @Override // edu.stanford.nlp.trees.TreeVisitor
            public void visitTree(Tree tree) {
                this.num += tree.yield().size();
            }
        });
        log.info(new Object[0]);
        Timing.endTime("traversing corpus, counting words with TreeVisitor");
        log.info("There were " + i + " words in the treebank.");
        log.info(new Object[0]);
        Timing.startTime();
        log.info("This treebank contains " + treebank.size() + " trees.");
        Timing.endTime("size of corpus");
    }

    private static void sentenceLengths(Treebank treebank, String str, String str2, PrintWriter printWriter) {
        int[] iArr = new int[152];
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        String str3 = "";
        double d = 0.0d;
        DecimalFormat decimalFormat = new DecimalFormat("0.0");
        boolean z = false;
        Iterator<Tree> it = treebank.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            i++;
            int size = next.yield().size();
            if (size <= 150) {
                iArr[size] = iArr[size] + 1;
            } else {
                iArr[151] = iArr[151] + 1;
            }
            i3 += size;
            if (size > i2) {
                i2 = size;
                str3 = next.toString();
            }
        }
        System.out.print("Files " + str + ' ');
        if (str2 != null) {
            System.out.print(str2 + ' ');
        }
        System.out.println("consists of " + i + " sentences");
        int i4 = 0;
        for (int i5 = 0; i5 <= 150; i5++) {
            i4 += iArr[i5];
            System.out.println("  " + iArr[i5] + " of length " + i5 + " (running total: " + i4 + ')');
            if (!z && i4 > i / 2) {
                if (i % 2 == 0 && i4 == (i / 2) + 1) {
                    int i6 = i5 - 1;
                    while (i6 > 0 && iArr[i6] == 0) {
                        i6--;
                    }
                    d = (i5 + i6) / 2.0d;
                } else {
                    d = i5;
                }
                z = true;
            }
        }
        if (iArr[151] > 0) {
            System.out.println("  " + iArr[151] + " of length 151 to " + i2 + " (running total: " + (i4 + iArr[151]) + ')');
        }
        System.out.println("Average length: " + decimalFormat.format(i3 / i) + "; median length: " + decimalFormat.format(d));
        System.out.println("Longest sentence is of length: " + i2);
        printWriter.println(str3);
    }
}
