package eu.dariah.de.search.crawling.crawler;

import de.unibamberg.minf.core.util.Stopwatch;
import de.unibamberg.minf.dme.model.base.Grammar;
import de.unibamberg.minf.dme.model.base.Nonterminal;
import de.unibamberg.minf.dme.model.function.FunctionImpl;
import de.unibamberg.minf.dme.model.grammar.GrammarImpl;
import de.unibamberg.minf.dme.model.mapping.base.MappedConcept;
import de.unibamberg.minf.mapping.model.MappingExecGroup;
import de.unibamberg.minf.mapping.service.MappingExecutionService;
import de.unibamberg.minf.processing.consumption.CollectingResourceConsumptionServiceImpl;
import de.unibamberg.minf.processing.exception.ProcessingConfigException;
import de.unibamberg.minf.processing.model.base.Resource;
import de.unibamberg.minf.processing.model.helper.ResourceHelper;
import de.unibamberg.minf.processing.service.base.BaseResourceProcessingServiceImpl;
import eu.dariah.de.search.config.MainConfigProperties;
import eu.dariah.de.search.crawling.CrawlHelper;
import eu.dariah.de.search.crawling.files.FileDownloader;
import eu.dariah.de.search.model.Crawl;
import eu.dariah.de.search.model.Endpoint;
import eu.dariah.de.search.model.ExtendedDatamodelContainer;
import eu.dariah.de.search.model.ExtendedMappingContainer;
import eu.dariah.de.search.service.DatamodelService;
import eu.dariah.de.search.service.MappingService;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.UUID;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;

/* loaded from: input_file:BOOT-INF/lib/search-core-4.3.24-RELEASE.jar:eu/dariah/de/search/crawling/crawler/RepetitiveFileCrawlerImpl.class */
public class RepetitiveFileCrawlerImpl extends FileDownloader implements ApplicationContextAware {
    private ApplicationContext appContext;

    @Autowired
    private MainConfigProperties mainConfig;

    @Autowired
    protected DatamodelService datamodelService;

    @Autowired
    private MappingService mappingService;

    @Autowired
    private MappingExecutionService mappingExecutionService;
    private Map<String, String> fileProcessingServiceMap;
    private Endpoint endpoint;
    private Crawl crawl;
    private ExtendedDatamodelContainer sourceDatamodel;
    private ExtendedDatamodelContainer targetDatamodel;
    private ExtendedMappingContainer mapping;
    private BaseResourceProcessingServiceImpl processingService;
    private MappingExecGroup mExecGroup;
    private List<String> processedUrls;
    private Queue<String> downloadUris;
    private int politenessTimespan = 1000;
    private CollectingResourceConsumptionServiceImpl sourceResCollector = null;
    private CollectingResourceConsumptionServiceImpl targetResCollector = null;
    Stopwatch swPoliteness = new Stopwatch();
    long currentSize = 0;
    long overallSize = 0;

    @Override // eu.dariah.de.search.crawling.files.FileDownloader, eu.dariah.de.search.crawling.files.BaseFileStreamCrawler
    public String getOutputFilename() {
        return super.getOutputFilename() + "." + (this.processedUrls == null ? 0 : this.processedUrls.size());
    }

    @Override // org.springframework.context.ApplicationContextAware
    public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
        this.appContext = applicationContext;
        this.outputFilename = UUID.randomUUID().toString();
    }

    @Override // eu.dariah.de.search.crawling.files.FileDownloader, eu.dariah.de.search.crawling.files.BaseFileStreamCrawler, eu.dariah.de.search.crawling.crawler.Crawler
    public void init(Endpoint endpoint, Crawl crawl, ExtendedDatamodelContainer extendedDatamodelContainer) {
        super.init(endpoint, crawl, extendedDatamodelContainer);
        this.endpoint = endpoint;
        this.crawl = crawl;
        initCrawlModels(extendedDatamodelContainer);
        if (this.mapping == null) {
            return;
        }
        initServices();
        if (this.processingService == null || this.mExecGroup == null) {
            return;
        }
        this.processedUrls = new ArrayList();
        this.downloadUris = new LinkedList();
    }

    @Override // eu.dariah.de.search.crawling.files.FileDownloader
    public void download() {
        this.swPoliteness.start();
        super.download();
        if (this.mExecGroup == null) {
            this.logger.debug("Resumptive crawling not applicable -> crawl done");
            registerFinished();
        } else {
            reset();
            processDownloadedFile();
            collectDownloadUris();
            setupAndDownloadNextFile();
        }
    }

    protected void registerFinished() {
        if (getListener() != null) {
            getListener().finished(getUuid());
        }
    }

    protected void registerError() {
        if (getListener() != null) {
            getListener().error(getUuid());
        }
    }

    @Override // eu.dariah.de.search.crawling.files.FileDownloader
    protected void updateFileSize(long j) {
        this.currentSize = j;
        this.overallSize += j;
        if (getListener() != null) {
            getListener().updateSize(getUuid(), this.overallSize);
        }
    }

    @Override // eu.dariah.de.search.crawling.files.FileDownloader
    protected void updateFileProcessed(long j) {
        if (getListener() != null) {
            getListener().processed(getUuid(), (this.overallSize - this.currentSize) + j);
        }
    }

    @Override // eu.dariah.de.search.crawling.files.FileDownloader
    protected void registerFileFinished() {
        this.currentSize = 0L;
    }

    @Override // eu.dariah.de.search.crawling.files.FileDownloader
    protected void registerFileError() {
    }

    private void initCrawlModels(ExtendedDatamodelContainer extendedDatamodelContainer) {
        this.mapping = null;
        if (this.mainConfig.getDatamodels().getCrawling() == null) {
            this.logger.warn("No GS: Repetitive Crawl Model configured; repetitive file crawling unavailable");
            return;
        }
        if (this.endpoint.getAccessModelId() != null) {
            this.logger.debug("Dedicated access modell configured: {}", this.endpoint.getAccessModelId());
            this.sourceDatamodel = this.datamodelService.findById(this.endpoint.getAccessModelId());
            if (this.sourceDatamodel == null) {
                this.logger.warn("Dedicated access modell configured but not available (Sync with DME required?)");
                return;
            }
        } else {
            this.logger.debug("No dedicated access modell, using datamodel: {}", extendedDatamodelContainer.getModel().getId());
            this.sourceDatamodel = extendedDatamodelContainer;
        }
        this.targetDatamodel = this.datamodelService.findById(this.mainConfig.getDatamodels().getCrawling());
        if (this.targetDatamodel == null) {
            this.logger.warn("Crawl modell configured but not available (Sync with DME required?)");
            return;
        }
        this.mapping = this.mappingService.getMappingBySourceAndTarget(this.sourceDatamodel.getModel().getId(), this.mainConfig.getDatamodels().getCrawling());
        if (this.mapping == null) {
            this.logger.info("No mapping to GS: Repetitive Crawl Model modeled; repetitive file crawling not configured");
        }
    }

    private void initServices() {
        this.processingService = null;
        if (!this.fileProcessingServiceMap.containsKey(this.endpoint.getFileType())) {
            this.logger.warn("Endpoint file type unsupported by repetitive crawling: {}", this.endpoint.getFileType());
            return;
        }
        try {
            this.processingService = (BaseResourceProcessingServiceImpl) BaseResourceProcessingServiceImpl.class.cast(this.appContext.getBean(this.fileProcessingServiceMap.get(this.endpoint.getFileType())));
            this.processingService.setSchema(this.sourceDatamodel.getModel());
            this.processingService.setRoot((Nonterminal) this.sourceDatamodel.getOrRenderElementHierarchy());
            this.sourceResCollector = new CollectingResourceConsumptionServiceImpl();
            this.processingService.addConsumptionService(this.sourceResCollector);
            this.mExecGroup = buildMappingExecutionGroup(this.mapping, this.targetDatamodel);
            this.targetResCollector = new CollectingResourceConsumptionServiceImpl();
            this.mappingExecutionService.addConsumptionService(this.targetResCollector);
            if (this.mExecGroup != null) {
                if (this.mExecGroup.getConcepts() == null || this.mExecGroup.getConcepts().isEmpty()) {
                    this.mExecGroup = null;
                }
            }
        } catch (Exception e) {
            this.logger.warn("No supporting processing service available for file type: {}", this.endpoint.getFileType());
        }
    }

    private MappingExecGroup buildMappingExecutionGroup(ExtendedMappingContainer extendedMappingContainer, ExtendedDatamodelContainer extendedDatamodelContainer) {
        if (extendedMappingContainer == null) {
            return null;
        }
        MappingExecGroup mappingExecGroup = new MappingExecGroup();
        mappingExecGroup.setMapping(extendedMappingContainer.getMapping());
        mappingExecGroup.setTargetSchemaId(extendedMappingContainer.getMapping().getTargetId());
        mappingExecGroup.setTargetElementTree(extendedDatamodelContainer.getOrRenderElementHierarchy());
        for (MappedConcept mappedConcept : extendedMappingContainer.getMapping().getConcepts()) {
            if (mappedConcept != null) {
                mappingExecGroup.setGrammarsMap(collectGrammars(extendedMappingContainer.getId(), mappedConcept.getElementGrammarIdsMap(), extendedMappingContainer.getGrammars()));
                FunctionImpl functionImpl = new FunctionImpl(extendedMappingContainer.getMapping().getId(), mappedConcept.getFunctionId());
                if (extendedMappingContainer.getFunctions().containsKey(mappedConcept.getFunctionId())) {
                    functionImpl.setFunction(extendedMappingContainer.getFunctions().get(mappedConcept.getFunctionId()));
                }
                mappingExecGroup.addMappedConcept(mappedConcept, functionImpl);
            }
        }
        return mappingExecGroup;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v23, types: [de.unibamberg.minf.dme.model.base.Grammar] */
    private Map<String, Grammar> collectGrammars(String str, Map<String, String> map, Map<String, Grammar> map2) {
        GrammarImpl grammarImpl;
        if (map == null) {
            return null;
        }
        HashMap hashMap = new HashMap();
        for (Map.Entry<String, String> entry : map.entrySet()) {
            if (map.get(entry.getKey()) == null || map2 == null || !map2.containsKey(map.get(entry.getKey()))) {
                grammarImpl = new GrammarImpl(str, entry.getValue());
                grammarImpl.setId(entry.getValue());
                grammarImpl.setPassthrough(true);
            } else {
                grammarImpl = map2.get(map.get(entry.getKey()));
            }
            hashMap.put(grammarImpl.getId(), grammarImpl);
        }
        return hashMap;
    }

    private void reset() {
        if (this.sourceResCollector != null) {
            this.sourceResCollector.setResources(null);
        }
        if (this.targetResCollector != null) {
            this.targetResCollector.setResources(null);
        }
    }

    private void processDownloadedFile() {
        this.logger.debug("Processing downloaded file: {}", getOutputPath());
        try {
            this.processingService.setInputStream(new FileInputStream(getOutputPath()));
            this.processingService.init();
            this.processingService.run();
        } catch (ProcessingConfigException | FileNotFoundException e) {
            this.logger.error("Exception while processing", e);
        }
        if (this.sourceResCollector.getResources() != null && !this.sourceResCollector.getResources().isEmpty()) {
            try {
                this.mappingExecutionService.init(this.mExecGroup, this.sourceResCollector.getResources());
                this.mappingExecutionService.run();
            } catch (ProcessingConfigException e2) {
                this.logger.error("Failed to initialize MappingExecutionService", (Throwable) e2);
            }
        }
        this.logger.debug("Mapping execution transformed {} to {} resources", Integer.valueOf(this.sourceResCollector.getResources() == null ? 0 : this.sourceResCollector.getResources().size()), Integer.valueOf(this.targetResCollector.getResources() == null ? 0 : this.targetResCollector.getResources().size()));
    }

    private void collectDownloadUris() {
        if (this.targetResCollector.getResources() != null) {
            for (Resource resource : this.targetResCollector.getResources()) {
                String renderAccessUrl = CrawlHelper.renderAccessUrl(this.endpoint.getUrl(), this.endpoint.getParams(), ResourceHelper.findRecursive(resource, "GET.Param"), ResourceHelper.findRecursive(resource, "GET.RemoveParam"));
                if (!this.processedUrls.contains(renderAccessUrl)) {
                    this.processedUrls.add(renderAccessUrl);
                    this.downloadUris.add(renderAccessUrl);
                }
            }
        }
    }

    private void setupAndDownloadNextFile() {
        if (this.downloadUris.isEmpty()) {
            this.logger.debug("Download queue is empty -> file crawling is complete");
            registerFinished();
            return;
        }
        try {
            setupPaths(this.crawl);
            this.inputURI = null;
            try {
                String remove = this.downloadUris.remove();
                this.logger.debug("Processing next downloadURI: {}", remove);
                this.inputURI = new URL(remove).toURI();
            } catch (MalformedURLException | URISyntaxException e) {
                this.logger.error("Failed to setup and download next file URI", e);
                registerError();
            }
            if (this.inputURI != null) {
                continueDownload();
            } else {
                setupAndDownloadNextFile();
            }
        } catch (MalformedURLException e2) {
            this.logger.error("Failed to setup paths for crawl", (Throwable) e2);
            registerError();
        }
    }

    private void continueDownload() {
        long elapsedTime = this.politenessTimespan - this.swPoliteness.getElapsedTime();
        if (elapsedTime <= 0) {
            download();
            return;
        }
        if (this.logger.isDebugEnabled()) {
            this.logger.debug(String.format("Crawl sleeping %sms due to politeness setting (%sms)", Long.valueOf(elapsedTime), Integer.valueOf(getPolitenessTimespan())));
        }
        try {
            Thread.sleep(elapsedTime);
            download();
        } catch (InterruptedException e) {
            this.logger.error("Thead.sleep interrupted", (Throwable) e);
            registerError();
            Thread.currentThread().interrupt();
        }
    }

    public Map<String, String> getFileProcessingServiceMap() {
        return this.fileProcessingServiceMap;
    }

    public void setFileProcessingServiceMap(Map<String, String> map) {
        this.fileProcessingServiceMap = map;
    }

    public int getPolitenessTimespan() {
        return this.politenessTimespan;
    }

    public void setPolitenessTimespan(int i) {
        this.politenessTimespan = i;
    }
}
