package de.unibamberg.minf.processing.service.online;

import com.ximpleware.AutoPilot;
import com.ximpleware.NavException;
import com.ximpleware.VTDGen;
import com.ximpleware.VTDNav;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
import de.unibamberg.minf.core.util.Stopwatch;
import de.unibamberg.minf.processing.exception.ResourceProcessingException;
import de.unibamberg.minf.processing.model.ContextAwareAutoPilot;
import de.unibamberg.minf.processing.service.base.BaseProcessingService;
import de.unibamberg.minf.processing.service.base.ProcessingService;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.annotation.Scope;

@Scope("prototype")
/* loaded from: input_file:BOOT-INF/lib/processing-core-4.7-SNAPSHOT.jar:de/unibamberg/minf/processing/service/online/OaiPmhHarvestingService.class */
public class OaiPmhHarvestingService extends BaseProcessingService implements ProcessingService, Runnable {
    protected static final Logger logger = LoggerFactory.getLogger((Class<?>) OaiPmhHarvestingService.class);
    public static final String OAI_PMH_ENVELOPE_RESOURCE = "OAIPMH_ENVELOPE";
    public static final String SCHEMA_NS_V2_0 = "http://www.openarchives.org/OAI/2.0/";
    public static final String SCHEMA_NS_V1_1_GET_RECORD = "http://www.openarchives.org/OAI/1.1/OAI_GetRecord";
    public static final String SCHEMA_NS_V1_1_IDENTIFY = "http://www.openarchives.org/OAI/1.1/OAI_Identify";
    public static final String SCHEMA_NS_V1_1_LIST_IDENTIFIERS = "http://www.openarchives.org/OAI/1.1/OAI_ListIdentifiers";
    public static final String SCHEMA_NS_V1_1_LIST_METADATA_FORMATS = "http://www.openarchives.org/OAI/1.1/OAI_ListMetadataFormats";
    public static final String SCHEMA_NS_V1_1_LIST_RECORDS = "http://www.openarchives.org/OAI/1.1/OAI_ListRecords";
    public static final String SCHEMA_NS_V1_1_LIST_SETS = "http://www.openarchives.org/OAI/1.1/OAI_ListSets";
    private ContextAwareAutoPilot apExprListRecords;
    private ContextAwareAutoPilot apExprResToken;
    private ContextAwareAutoPilot apExprRecord;
    private ContextAwareAutoPilot apExprListSize;
    private VTDGen vg;
    private VTDNav vn;
    private int politenessTimespan;
    private String crawlDir;
    private String url;
    private String set;
    private String dateTimePattern;
    private String prefix;
    private String currentResumptionToken;
    private int maxRedirects = 10;
    private int overallTimeoutMinutes = 4320;
    private int maxErrors = -1;
    private long size = 0;
    private long processed = 0;

    public int getPolitenessTimespan() {
        return this.politenessTimespan;
    }

    public void setPolitenessTimespan(int i) {
        this.politenessTimespan = i;
    }

    public int getOverallTimeoutMinutes() {
        return this.overallTimeoutMinutes;
    }

    public void setOverallTimeoutMinutes(int i) {
        this.overallTimeoutMinutes = i;
    }

    public int getMaxErrors() {
        return this.maxErrors;
    }

    public void setMaxErrors(int i) {
        this.maxErrors = i;
    }

    public String getCrawlDir() {
        return this.crawlDir;
    }

    public void setCrawlDir(String str) {
        this.crawlDir = str;
    }

    public String getUrl() {
        return this.url;
    }

    public void setUrl(String str) {
        this.url = str;
    }

    public String getSet() {
        return this.set;
    }

    public void setSet(String str) {
        this.set = str;
    }

    public String getPrefix() {
        return this.prefix;
    }

    public void setPrefix(String str) {
        this.prefix = str;
    }

    @Override // java.lang.Runnable
    public void run() {
        if (getListener() != null) {
            getListener().start(getUuid());
        }
        Stopwatch stopwatch = new Stopwatch();
        Stopwatch stopwatch2 = new Stopwatch();
        String str = "";
        int i = 1;
        int i2 = 0;
        int i3 = 0;
        stopwatch.start();
        this.currentResumptionToken = null;
        do {
            stopwatch2.start();
            logger.info(String.format("Crawling from url [%s]; index [%s]; resumption token [%s]", this.url, Integer.valueOf(i - 1), this.currentResumptionToken));
            try {
                try {
                } catch (Exception e) {
                    logger.error(String.format("Could not get or process response of %s", str), (Throwable) e);
                    i2++;
                    if (isCancellationRequested()) {
                        logger.error("Service cancellation has been requested. Cancelling crawl.");
                        if (getListener() != null) {
                            getListener().error(getUuid());
                        }
                        stopwatch2.stop().reset();
                        return;
                    }
                    if ((e instanceof OaiPmhHarvestingException) && ((OaiPmhHarvestingException) e).isFatal()) {
                        logger.error("A fatal harvesting error occurred. Cancelling crawl.");
                        if (getListener() != null) {
                            getListener().error(getUuid());
                        }
                        stopwatch2.stop().reset();
                        return;
                    }
                    if (this.maxErrors > 0 && i2 > this.maxErrors) {
                        logger.error(String.format("Error count exceeded configured value for maximum acceptable errors (%s)", Integer.valueOf(this.maxErrors)));
                        if (getListener() != null) {
                            getListener().error(getUuid());
                        }
                        stopwatch2.stop().reset();
                        return;
                    }
                    stopwatch2.stop().reset();
                }
                if (isCancellationRequested()) {
                    throw new ResourceProcessingException("Service cancellation has been requested");
                }
                String format = String.format("%s%08d.xml", this.crawlDir + File.separator, Integer.valueOf(i));
                str = buildRequestUrl(this.currentResumptionToken, null, null, this.set, getPrefix());
                byte[] downloadByteArray = downloadByteArray(new URL(str).toURI());
                logger.debug(String.format("Request to url [%s] returned %s bytes", str, Integer.valueOf(downloadByteArray.length)));
                writeToFile(downloadByteArray, format);
                this.vg = new VTDGen();
                this.vg.setDoc(downloadByteArray);
                this.vg.parse(true);
                this.vn = this.vg.getNav();
                Map<String, String> namespaces = getNamespaces(this.vn, ".");
                namespaces.put("http://www.w3.org/XML/1998/namespace", "xml");
                processAdapterContent(this.vn, namespaces);
                long elapsedTime = this.politenessTimespan - stopwatch2.getElapsedTime();
                if (elapsedTime > 0) {
                    if (logger.isDebugEnabled()) {
                        logger.debug(String.format("Crawl sleeping %sms due to politeness setting (%sms) [%s] ", Long.valueOf(elapsedTime), Integer.valueOf(getPolitenessTimespan()), str));
                    }
                    Thread.sleep(elapsedTime);
                }
                if (this.overallTimeoutMinutes > 0 && stopwatch.getElapsedTime() > this.overallTimeoutMinutes * 60000) {
                    throw new OaiPmhHarvestingException(String.format("Crawl exceeded the maximum allowed duration of %s minutes", Integer.valueOf(this.overallTimeoutMinutes)), true);
                }
                if (getListener() != null) {
                    i3++;
                    getListener().processed(getUuid(), this.processed);
                }
                i++;
                stopwatch2.stop().reset();
                if (this.currentResumptionToken == null) {
                    break;
                }
            } catch (Throwable th) {
                stopwatch2.stop().reset();
                throw th;
            }
        } while (!this.currentResumptionToken.isEmpty());
        if (i3 == 0 && i2 > 0) {
            logger.error(String.format("Completed crawl from url [%s] with errors; no files processed", this.url, Integer.valueOf(i - 1)));
            if (getListener() != null) {
                getListener().error(getUuid());
            }
        } else if (i3 == 0 && i2 == 0) {
            logger.info(String.format("Completed crawl from url [%s]; no files processed", this.url, Integer.valueOf(i - 1)));
            if (getListener() != null) {
                getListener().finished(getUuid());
            }
        }
        logger.info(String.format("Completed crawl from url [%s]; %s file(s) written", this.url, Integer.valueOf(i - 1)));
        if (getListener() != null) {
            getListener().finished(getUuid());
        }
    }

    public byte[] downloadByteArray(URI uri) {
        if (uri == null) {
            logger.error("Either download URL or fileName was not specified or both.");
            return new byte[0];
        }
        ReadableByteChannel readableByteChannel = null;
        URL url = null;
        URLConnection uRLConnection = null;
        int i = 200;
        int i2 = -1;
        while (true) {
            if (url != null && i != 303 && i != 301 && i != 302) {
                if (uRLConnection == null) {
                    byte[] bArr = new byte[0];
                    if (0 != 0) {
                        try {
                            readableByteChannel.close();
                        } catch (IOException e) {
                        }
                    }
                    return bArr;
                }
                ReadableByteChannel newChannel = Channels.newChannel(uRLConnection.getInputStream());
                logger.debug("Embedding file {}; {} redirects", uri, Integer.valueOf(i2));
                byte[] byteArray = IOUtils.toByteArray(uRLConnection.getInputStream());
                if (newChannel != null) {
                    try {
                        newChannel.close();
                    } catch (IOException e2) {
                    }
                }
                return byteArray;
            }
            try {
                try {
                    i2++;
                    if (i2 > this.maxRedirects) {
                        throw new IOException("Too many redirects, configured maximum: " + this.maxRedirects);
                    }
                    if (url == null) {
                        url = uri.toURL();
                    }
                    uRLConnection = url.openConnection();
                    if (HttpURLConnection.class.isAssignableFrom(uRLConnection.getClass())) {
                        HttpURLConnection httpURLConnection = (HttpURLConnection) uRLConnection;
                        httpURLConnection.setInstanceFollowRedirects(false);
                        i = httpURLConnection.getResponseCode();
                        if (i == 303 || i == 301 || i == 302) {
                            String headerField = httpURLConnection.getHeaderField("Location");
                            if (headerField.startsWith("/")) {
                                headerField = url.getProtocol() + "://" + url.getHost() + headerField;
                            }
                            url = new URL(headerField);
                        }
                    }
                } catch (IOException e3) {
                    logger.error(String.format("Failed to embed file from [%s]", uri.getPath()), (Throwable) e3);
                    byte[] bArr2 = new byte[0];
                    if (0 != 0) {
                        try {
                            readableByteChannel.close();
                        } catch (IOException e4) {
                        }
                    }
                    return bArr2;
                }
            } catch (Throwable th) {
                if (0 != 0) {
                    try {
                        readableByteChannel.close();
                    } catch (IOException e5) {
                    }
                }
                throw th;
            }
        }
    }

    protected void processAdapterContent(VTDNav vTDNav, Map<String, String> map) throws OaiPmhHarvestingException {
        HashMap hashMap = new HashMap();
        hashMap.put(getOaiPmhVersion(map), "pmh");
        try {
            compileAutoPilots(vTDNav, hashMap);
            vTDNav.push();
            if (this.apExprListRecords.eval() < 0) {
                throw new OaiPmhHarvestingException("Invalid response to OAI-PMH ListRecords command", true);
            }
            while (this.apExprRecord.eval() != -1) {
                this.processed++;
            }
            this.currentResumptionToken = evalToString(vTDNav, this.apExprResToken);
            if (this.size == 0) {
                try {
                    this.size = Long.parseLong(evalToString(vTDNav, this.apExprListSize));
                    if (this.size > 0 && getListener() != null) {
                        getListener().updateSize(getUuid(), this.size);
                    }
                } catch (Exception e) {
                }
            }
            vTDNav.pop();
        } catch (Exception e2) {
            logger.error("An exception occurred when harvesting OAI-PMH data", (Throwable) e2);
            if (!(e2 instanceof OaiPmhHarvestingException)) {
                throw new OaiPmhHarvestingException(e2);
            }
            throw ((OaiPmhHarvestingException) e2);
        }
    }

    private void writeToFile(byte[] bArr, String str) {
        try {
            File file = new File(str);
            if (file.exists()) {
                file.delete();
            }
            FileUtils.writeByteArrayToFile(new File(str), bArr);
        } catch (Exception e) {
            logger.error("Failed to write XML content to file", (Throwable) e);
        }
    }

    private String buildRequestUrl(String str, DateTime dateTime, DateTime dateTime2, String str2, String str3) throws UnsupportedEncodingException, OaiPmhHarvestingException {
        StringBuffer stringBuffer = new StringBuffer(this.url);
        stringBuffer.append("?verb=ListRecords");
        if (str != null && str != "") {
            stringBuffer.append("&resumptionToken=").append(str);
            return stringBuffer.toString();
        }
        if (dateTime != null) {
            stringBuffer.append("&from=").append(convertDateToUtcString(dateTime));
        }
        if (dateTime2 != null) {
            stringBuffer.append("&until=").append(convertDateToUtcString(dateTime2));
        }
        if (str2 != null && !str2.isEmpty()) {
            stringBuffer.append("&set=").append(str2);
        }
        if (str3 == null || str3.trim().isEmpty()) {
            throw new OaiPmhHarvestingException("No metadataPrefix specified. Cannot build url for OAI-PMH request", true);
        }
        stringBuffer.append("&metadataPrefix=").append(str3);
        return stringBuffer.toString();
    }

    protected String evalToString(VTDNav vTDNav, ContextAwareAutoPilot contextAwareAutoPilot) throws NavException, XPathEvalException {
        int eval = contextAwareAutoPilot.isAttribute() ? contextAwareAutoPilot.eval() + 1 : vTDNav.getText();
        if (eval < 0) {
            return null;
        }
        return vTDNav.toString(eval);
    }

    private String getOaiPmhVersion(Map<String, String> map) {
        for (String str : map.keySet()) {
            if (str.equals(SCHEMA_NS_V2_0)) {
                return SCHEMA_NS_V2_0;
            }
            if (str.equals(SCHEMA_NS_V1_1_GET_RECORD)) {
                return SCHEMA_NS_V1_1_GET_RECORD;
            }
            if (str.equals(SCHEMA_NS_V1_1_IDENTIFY)) {
                return SCHEMA_NS_V1_1_IDENTIFY;
            }
            if (str.equals(SCHEMA_NS_V1_1_LIST_IDENTIFIERS)) {
                return SCHEMA_NS_V1_1_LIST_IDENTIFIERS;
            }
            if (str.equals(SCHEMA_NS_V1_1_LIST_METADATA_FORMATS)) {
                return SCHEMA_NS_V1_1_LIST_METADATA_FORMATS;
            }
            if (str.equals(SCHEMA_NS_V1_1_LIST_RECORDS)) {
                return SCHEMA_NS_V1_1_LIST_RECORDS;
            }
            if (str.equals(SCHEMA_NS_V1_1_LIST_SETS)) {
                return SCHEMA_NS_V1_1_LIST_SETS;
            }
        }
        logger.warn(String.format("Could not identify correct OAI-PMH namespace; using default: [%s]", SCHEMA_NS_V2_0));
        return SCHEMA_NS_V2_0;
    }

    protected String convertDateToUtcString(DateTime dateTime) throws IllegalArgumentException {
        if (dateTime == null) {
            return null;
        }
        return DateTimeFormat.forPattern(this.dateTimePattern).print(dateTime);
    }

    private void compileAutoPilots(VTDNav vTDNav, Map<String, String> map) throws XPathParseException {
        this.apExprListRecords = createAutoPilot(vTDNav, map, false, "//pmh:ListRecords");
        this.apExprRecord = createAutoPilot(vTDNav, map, false, "pmh:record");
        this.apExprResToken = createAutoPilot(vTDNav, map, false, "pmh:resumptionToken");
        this.apExprListSize = createAutoPilot(vTDNav, map, true, "@completeListSize");
    }

    protected ContextAwareAutoPilot createAutoPilot(VTDNav vTDNav, Map<String, String> map, boolean z, String str) throws XPathParseException {
        AutoPilot autoPilot = new AutoPilot(vTDNav);
        for (String str2 : map.keySet()) {
            autoPilot.declareXPathNameSpace(map.get(str2), str2);
        }
        try {
            autoPilot.selectXPath(str);
            return new ContextAwareAutoPilot(autoPilot, str, z);
        } catch (Exception e) {
            logger.error("Failed to select XPath", (Throwable) e);
            return null;
        }
    }

    protected Map<String, String> getNamespaces(VTDNav vTDNav, String str) throws XPathParseException, XPathEvalException, NavException {
        int i;
        vTDNav.push();
        TreeMap treeMap = new TreeMap();
        AutoPilot autoPilot = new AutoPilot(vTDNav);
        autoPilot.selectXPath(str);
        autoPilot.evalXPath();
        int currentIndex = vTDNav.getCurrentIndex() + 1;
        int tokenCount = vTDNav.getTokenCount();
        int i2 = 0;
        while (currentIndex < tokenCount) {
            int tokenType = vTDNav.getTokenType(currentIndex);
            while (true) {
                i = tokenType;
                if (i != 2) {
                    break;
                }
                currentIndex += 2;
                tokenType = vTDNav.getTokenType(currentIndex);
            }
            if (i == 3) {
                String vTDNav2 = vTDNav.toString(currentIndex + 1);
                if (treeMap.containsKey(vTDNav2)) {
                    currentIndex++;
                } else {
                    int i3 = i2;
                    i2++;
                    treeMap.put(vTDNav2, "pf" + i3);
                }
            }
            currentIndex++;
        }
        vTDNav.pop();
        if (logger.isDebugEnabled()) {
            if (treeMap.keySet().size() > 0) {
                logger.debug(String.format("Identified content namespaces [%s]", treeMap.keySet()));
            } else {
                logger.debug("No content namespaces identified");
            }
        }
        return treeMap;
    }
}
