package edu.stanford.nlp.international.arabic.process;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.objectbank.IteratorFromReaderFactory;
import edu.stanford.nlp.objectbank.LineIterator;
import edu.stanford.nlp.process.SerializableFunction;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.eclipse.jgit.transport.WalkEncryption;

/* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.5.6.jar:edu/stanford/nlp/international/arabic/process/ArabicDocumentReaderAndWriter.class */
public class ArabicDocumentReaderAndWriter implements DocumentReaderAndWriter<CoreLabel> {
    private static final Redwood.RedwoodChannels log;
    private static final long serialVersionUID = 3667837672769424178L;
    private final IteratorFromReaderFactory<List<CoreLabel>> factory;
    private final TokenizerFactory<CoreLabel> tf;
    private static final Character DEFAULT_SEG_MARKER;
    private final Character segMarker;
    private static final String tagDelimiter = "|||";
    private static final String rewriteDelimiter = ">>>";
    private final boolean inputHasTags;
    private final boolean inputHasDomainLabels;
    private final String inputDomain;
    private final boolean shouldStripRewrites;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.5.6.jar:edu/stanford/nlp/international/arabic/process/ArabicDocumentReaderAndWriter$RewrittenArabicAnnotation.class */
    public static class RewrittenArabicAnnotation implements CoreAnnotation<String> {
        @Override // edu.stanford.nlp.ling.CoreAnnotation
        public Class<String> getType() {
            return String.class;
        }
    }

    public ArabicDocumentReaderAndWriter(boolean z) {
        this(z, null);
    }

    public ArabicDocumentReaderAndWriter(boolean z, TokenizerFactory<CoreLabel> tokenizerFactory) {
        this(z, false, tokenizerFactory);
    }

    public ArabicDocumentReaderAndWriter(boolean z, boolean z2, TokenizerFactory<CoreLabel> tokenizerFactory) {
        this(z, z2, false, "123", tokenizerFactory);
    }

    public ArabicDocumentReaderAndWriter(boolean z, boolean z2, boolean z3, String str, TokenizerFactory<CoreLabel> tokenizerFactory) {
        this(z, z2, z3, str, false, tokenizerFactory);
    }

    public ArabicDocumentReaderAndWriter(boolean z, boolean z2, boolean z3, String str, boolean z4, TokenizerFactory<CoreLabel> tokenizerFactory) {
        this.tf = tokenizerFactory;
        this.inputHasTags = z2;
        this.inputHasDomainLabels = z3;
        this.inputDomain = str;
        this.shouldStripRewrites = z4;
        this.segMarker = z ? DEFAULT_SEG_MARKER : null;
        this.factory = LineIterator.getFactory(new SerializableFunction<String, List<CoreLabel>>() { // from class: edu.stanford.nlp.international.arabic.process.ArabicDocumentReaderAndWriter.1
            private static final long serialVersionUID = 5243251505653686497L;
            static final /* synthetic */ boolean $assertionsDisabled;

            @Override // java.util.function.Function
            public List<CoreLabel> apply(String str2) {
                List<CoreLabel> StringToIOB;
                String str3 = "";
                if (ArabicDocumentReaderAndWriter.this.inputHasDomainLabels) {
                    String[] split = str2.split(WalkEncryption.Vals.REGEX_WS, 2);
                    if (split.length < 2) {
                        ArabicDocumentReaderAndWriter.log.info("Missing domain label or text: ");
                        ArabicDocumentReaderAndWriter.log.info(str2);
                    } else {
                        str3 = split[0];
                        str2 = split[1];
                    }
                } else {
                    str3 = ArabicDocumentReaderAndWriter.this.inputDomain;
                }
                if (ArabicDocumentReaderAndWriter.this.inputHasTags) {
                    String[] split2 = str2.split(WalkEncryption.Vals.REGEX_WS);
                    ArrayList arrayList = new ArrayList(split2.length);
                    String quote = Pattern.quote("|||");
                    String quote2 = Pattern.quote(ArabicDocumentReaderAndWriter.rewriteDelimiter);
                    for (String str4 : split2) {
                        String[] split3 = str4.split(quote);
                        if (!$assertionsDisabled && split3.length != 2) {
                            throw new AssertionError();
                        }
                        String[] split4 = split3[0].split(quote2);
                        if (!$assertionsDisabled && split4.length != 1 && split4.length != 2) {
                            throw new AssertionError();
                        }
                        String str5 = split4[0];
                        String str6 = str5;
                        if (split4.length == 2) {
                            str6 = split4[1];
                        }
                        CoreLabel coreLabel = new CoreLabel();
                        if (ArabicDocumentReaderAndWriter.this.tf != null) {
                            List list = ArabicDocumentReaderAndWriter.this.tf.getTokenizer(new StringReader(str5)).tokenize();
                            List list2 = ArabicDocumentReaderAndWriter.this.tf.getTokenizer(new StringReader(str6)).tokenize();
                            if (list2.size() != list.size()) {
                                System.err.printf("%s: Different number of tokens in raw and rewritten: %s>>>%s%n", getClass().getName(), str5, str6);
                                list2 = list;
                            }
                            if (!list.isEmpty()) {
                                if (list.size() == 1) {
                                    str5 = ((CoreLabel) list.get(0)).value();
                                    str6 = ((CoreLabel) list2.get(0)).value();
                                } else if (list.size() > 1) {
                                    if (((CoreLabel) list.get(1)).value().equals(String.valueOf(ArabicDocumentReaderAndWriter.this.segMarker))) {
                                        str5 = ((CoreLabel) list.get(0)).value() + ArabicDocumentReaderAndWriter.this.segMarker;
                                        str6 = ((CoreLabel) list2.get(0)).value() + ArabicDocumentReaderAndWriter.this.segMarker;
                                    } else {
                                        System.err.printf("%s: Raw token generates multiple segments: %s%n", getClass().getName(), str5);
                                        str5 = ((CoreLabel) list.get(0)).value();
                                        str6 = ((CoreLabel) list2.get(0)).value();
                                    }
                                }
                            }
                        }
                        coreLabel.setValue(str5);
                        coreLabel.setWord(str5);
                        coreLabel.setTag(split3[1]);
                        coreLabel.set(CoreAnnotations.DomainAnnotation.class, str3);
                        coreLabel.set(RewrittenArabicAnnotation.class, str6);
                        arrayList.add(coreLabel);
                    }
                    StringToIOB = IOBUtils.StringToIOB(arrayList, ArabicDocumentReaderAndWriter.this.segMarker, true, ArabicDocumentReaderAndWriter.this.shouldStripRewrites);
                } else {
                    StringToIOB = ArabicDocumentReaderAndWriter.this.tf == null ? IOBUtils.StringToIOB(str2, ArabicDocumentReaderAndWriter.this.segMarker) : IOBUtils.StringToIOB(ArabicDocumentReaderAndWriter.this.tf.getTokenizer(new StringReader(str2)).tokenize(), ArabicDocumentReaderAndWriter.this.segMarker, false);
                }
                if (ArabicDocumentReaderAndWriter.this.inputHasDomainLabels && !ArabicDocumentReaderAndWriter.this.inputHasTags) {
                    IOBUtils.labelDomain(StringToIOB, str3);
                } else if (!ArabicDocumentReaderAndWriter.this.inputHasDomainLabels) {
                    IOBUtils.labelDomain(StringToIOB, ArabicDocumentReaderAndWriter.this.inputDomain);
                }
                return StringToIOB;
            }

            static {
                $assertionsDisabled = !ArabicDocumentReaderAndWriter.class.desiredAssertionStatus();
            }
        });
    }

    @Override // edu.stanford.nlp.sequences.DocumentReaderAndWriter
    public void init(SeqClassifierFlags seqClassifierFlags) {
    }

    @Override // edu.stanford.nlp.objectbank.IteratorFromReaderFactory
    public Iterator<List<CoreLabel>> getIterator(Reader reader) {
        return this.factory.getIterator(reader);
    }

    @Override // edu.stanford.nlp.sequences.DocumentReaderAndWriter
    public void printAnswers(List<CoreLabel> list, PrintWriter printWriter) {
        printWriter.println("Answer\tGoldAnswer\tCharacter");
        for (CoreLabel coreLabel : list) {
            printWriter.printf("%s\t%s\t%s%n", coreLabel.get(CoreAnnotations.AnswerAnnotation.class), coreLabel.get(CoreAnnotations.GoldAnswerAnnotation.class), coreLabel.get(CoreAnnotations.CharAnnotation.class));
        }
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length != 1) {
            System.err.printf("Usage: java %s file > output%n", ArabicDocumentReaderAndWriter.class.getName());
            System.exit(-1);
        }
        String str = strArr[0];
        TokenizerFactory<CoreLabel> atbFactory = ArabicTokenizer.atbFactory();
        atbFactory.setOptions("removeProMarker,removeMorphMarker");
        BufferedReader readerFromString = IOUtils.readerFromString(str);
        while (true) {
            String readLine = readerFromString.readLine();
            if (readLine == null) {
                return;
            }
            String[] split = readLine.split(WalkEncryption.Vals.REGEX_WS);
            String quote = Pattern.quote("|||");
            boolean z = true;
            for (String str2 : split) {
                String[] split2 = str2.split(quote);
                if (!$assertionsDisabled && split2.length != 2) {
                    throw new AssertionError();
                }
                String str3 = split2[0];
                if (atbFactory != null) {
                    List<CoreLabel> list = atbFactory.getTokenizer(new StringReader(str3)).tokenize();
                    if (list.size() != 0) {
                        if (list.size() == 1) {
                            str3 = list.get(0).value();
                        } else if (list.size() > 1) {
                            if (list.get(1).value().equals(String.valueOf(DEFAULT_SEG_MARKER))) {
                                str3 = list.get(0).value() + String.valueOf(DEFAULT_SEG_MARKER);
                            } else {
                                System.err.printf("%s: Raw token generates multiple segments: %s%n", ArabicDocumentReaderAndWriter.class.getName(), str3);
                                str3 = list.get(0).value();
                            }
                        }
                    }
                }
                if (!z) {
                    System.out.print(" ");
                }
                System.out.print(str3);
                z = false;
            }
            System.out.println();
        }
    }

    static {
        $assertionsDisabled = !ArabicDocumentReaderAndWriter.class.desiredAssertionStatus();
        log = Redwood.channels(ArabicDocumentReaderAndWriter.class);
        DEFAULT_SEG_MARKER = '-';
    }
}
