package eu.dariah.de.search.crawling;

import de.unibamberg.minf.core.util.Stopwatch;
import eu.dariah.de.search.automation.TimeoutTimerTask;
import eu.dariah.de.search.es.service.AdminService;
import eu.dariah.de.search.mapping.MappingGenerationService;
import eu.dariah.de.search.model.Collection;
import eu.dariah.de.search.model.Crawl;
import eu.dariah.de.search.model.Dataset;
import eu.dariah.de.search.model.Endpoint;
import eu.dariah.de.search.model.ExtendedDatamodelContainer;
import eu.dariah.de.search.service.CrawlService;
import eu.dariah.de.search.service.DatamodelService;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import java.util.UUID;
import java.util.concurrent.Executors;
import java.util.concurrent.locks.ReentrantLock;
import org.joda.time.DateTime;
import org.joda.time.Period;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.Assert;

/* loaded from: input_file:BOOT-INF/lib/search-core-4.3.24-RELEASE.jar:eu/dariah/de/search/crawling/TimedCrawlManagerImpl.class */
public class TimedCrawlManagerImpl extends CrawlManagerImpl implements TimedCrawlManager {

    @Autowired
    private DatamodelService datamodelService;

    @Autowired
    private AdminService adminService;

    @Autowired
    private MappingGenerationService mappingGenerationService;
    private TimerTask timeoutTimerTask;
    private DateTime lastSyncTimestamp;
    private boolean maintenanceMode;
    protected boolean debugging;
    private boolean autocrawlOnline;
    private boolean autocrawlOffline;
    private int syncInterval;
    private int timeout;
    protected final Logger logger = LoggerFactory.getLogger((Class<?>) TimedCrawlManagerImpl.class);
    private Timer syncTimer = null;
    private ReentrantLock setupLock = new ReentrantLock();
    private Map<String, List<String>> endpointIdCrawlIdMap = new HashMap();
    private Map<String, List<String>> datamodelIdCrawlIdMap = new HashMap();
    private Map<String, List<String>> outdatedDatamodelIdCrawlIdMap = new HashMap();

    public boolean isDebugging() {
        return this.debugging;
    }

    public void setDebugging(boolean z) {
        this.debugging = z;
    }

    public boolean isAutocrawlOnline() {
        return this.autocrawlOnline;
    }

    public void setAutocrawlOnline(boolean z) {
        this.autocrawlOnline = z;
    }

    public boolean isAutocrawlOffline() {
        return this.autocrawlOffline;
    }

    public void setAutocrawlOffline(boolean z) {
        this.autocrawlOffline = z;
    }

    public int getSyncInterval() {
        return this.syncInterval;
    }

    public void setSyncInterval(int i) {
        this.syncInterval = i;
    }

    public int getTimeout() {
        return this.timeout;
    }

    public void setTimeout(int i) {
        this.timeout = i;
    }

    @Override // eu.dariah.de.search.crawling.TimedCrawlManager
    public DateTime getLastSyncTimestamp() {
        return this.lastSyncTimestamp;
    }

    public boolean isMaintenanceMode() {
        return this.maintenanceMode;
    }

    @Override // eu.dariah.de.search.crawling.CrawlManagerImpl, org.springframework.beans.factory.InitializingBean
    public void afterPropertiesSet() throws Exception {
        super.afterPropertiesSet();
        if (!this.autocrawlOffline && !this.autocrawlOnline) {
            this.logger.info("CrawlManager initialized without reocurring synchronization due to configuration");
            return;
        }
        if (this.autocrawlOnline || this.autocrawlOffline) {
            if (!this.debugging && this.syncInterval < 60) {
                this.syncInterval = 60;
                this.logger.warn("Sync interval increased to 60s");
            }
            if (!this.debugging && this.timeout < 30) {
                this.timeout = 30;
                this.logger.warn("Crawl timeout increased to 30s (which is probably still too short)");
            }
            this.syncTimer = new Timer();
            setupSyncTimer();
        }
    }

    @Override // eu.dariah.de.search.crawling.TimedCrawlManager
    public void synchronize() {
        Stopwatch start = new Stopwatch().start();
        if (this.debugging) {
            this.logger.debug("Executing synchronization");
        }
        try {
            this.statusMapslock.lock();
            if (this.autocrawlOnline) {
                enqueueNewAndOutdatedDatasets();
            }
            if (this.debugging) {
                this.logger.debug("Synchronization completed in {}ms", Long.valueOf(start.getElapsedTime()));
            }
        } catch (Exception e) {
            this.logger.error("Failed to execute crawl synchronization", (Throwable) e);
        } finally {
            this.statusMapslock.unlock();
        }
    }

    private void enqueueNewAndOutdatedDatasets() {
        DateTime now = DateTime.now();
        for (Collection collection : this.collectionService.getAll()) {
            boolean z = false;
            boolean z2 = (collection.getUpdatePeriod() == null || collection.getUpdatePeriod().isEmpty()) ? false : true;
            for (Endpoint endpoint : collection.getEndpoints()) {
                for (Dataset dataset : endpoint.getDatasets()) {
                    try {
                        if (dataset.isNew()) {
                            enqueueOnlineCrawlIfAvailable(collection, endpoint, dataset);
                        } else if (z2 && dataset.isOutdated()) {
                            enqueueOnlineCrawlIfAvailable(collection, endpoint, dataset);
                        } else {
                            List<Crawl> findCrawls = this.crawlService.findCrawls(endpoint.getId(), dataset.getId(), CrawlService.CrawlOnlineFlag.Online, CrawlService.CrawlCompleteFlag.Both, CrawlService.CrawlErrorFlag.Both, 1);
                            if (findCrawls == null || findCrawls.size() == 0) {
                                enqueueOnlineCrawlIfAvailable(collection, endpoint, dataset);
                            } else if (z2 && now.isAfter(findCrawls.get(0).getModified().plus(new Period(collection.getUpdatePeriod())))) {
                                dataset.setOutdated(true);
                                z = true;
                                enqueueOnlineCrawlIfAvailable(collection, endpoint, dataset);
                            }
                        }
                    } catch (Exception e) {
                        this.logger.error("Failed to process dataset for autocrawling", (Throwable) e);
                        dataset.setError(true);
                        z = true;
                    }
                }
            }
            if (z) {
                this.collectionService.saveCollection(collection);
            }
        }
    }

    private boolean enqueueOnlineCrawlIfAvailable(Collection collection, Endpoint endpoint, Dataset dataset) {
        if (this.outdatedDatamodelIdCrawlIdMap.containsKey(dataset.getId())) {
            if (!this.debugging) {
                return false;
            }
            this.logger.debug("Cannot enqueue online crawl due to outdated datamodel [{}] being reprocessed", dataset.getId());
            return false;
        }
        if (this.endpointIdCrawlIdMap.containsKey(endpoint.getId())) {
            if (!this.debugging) {
                return false;
            }
            this.logger.debug("Cannot enqueue online crawl due to endpoint [{}] already being busy", endpoint.getId());
            return false;
        }
        if (this.debugging) {
            this.logger.debug("Enqueued online crawl for endpoint [{}] and dataset [{}]", endpoint.getId(), dataset.getId());
        }
        performOnlineCrawl(collection, endpoint, this.datamodelService.findById(dataset.getId()));
        return true;
    }

    private int reindexDatamodel(ExtendedDatamodelContainer extendedDatamodelContainer) {
        String completedOnlineCrawlId;
        int i = 0;
        for (Collection collection : this.collectionService.findByDatamodelId(extendedDatamodelContainer.getId())) {
            for (Endpoint endpoint : collection.getEndpoints()) {
                for (Dataset dataset : endpoint.getDatasets()) {
                    if (dataset.getId().equals(extendedDatamodelContainer.getId()) && (completedOnlineCrawlId = getCompletedOnlineCrawlId(endpoint.getId(), dataset.getId())) != null) {
                        performOfflineCrawl(collection, endpoint, extendedDatamodelContainer, completedOnlineCrawlId);
                        if (this.debugging) {
                            this.logger.debug("");
                        }
                        i++;
                    }
                }
            }
        }
        this.logger.debug("Reindexing {} datasets of datamodel {}", Integer.valueOf(i), extendedDatamodelContainer.getId());
        return i;
    }

    private boolean recreateIndex(ExtendedDatamodelContainer extendedDatamodelContainer) {
        try {
            Assert.notNull(extendedDatamodelContainer);
            Assert.notNull(extendedDatamodelContainer.getIndexName());
            Assert.notNull(extendedDatamodelContainer.getId());
            if (this.adminService.getIndexExists(extendedDatamodelContainer.getIndexName())) {
                this.adminService.dropIndex(extendedDatamodelContainer.getIndexName());
            }
            this.adminService.createIndexIfNotExists(extendedDatamodelContainer.getIndexName());
            this.adminService.putMapping(extendedDatamodelContainer.getIndexName(), this.mappingGenerationService.generateMappingForModel(extendedDatamodelContainer.getId()));
            return true;
        } catch (Exception e) {
            this.logger.error(String.format("Failed to recreate index and mapping for model [%s]", extendedDatamodelContainer.getId()));
            return false;
        }
    }

    private String getCompletedOnlineCrawlId(String str, String str2) {
        List<Crawl> findCrawls = this.crawlService.findCrawls(str, str2, CrawlService.CrawlOnlineFlag.Online, CrawlService.CrawlCompleteFlag.Complete, CrawlService.CrawlErrorFlag.NoError, 1);
        if (findCrawls == null || findCrawls.size() <= 0) {
            return null;
        }
        return findCrawls.get(0).getId();
    }

    @Override // eu.dariah.de.search.crawling.CrawlManagerImpl
    protected void enqueue(CrawlPipeline crawlPipeline, Crawl crawl) {
        if (crawlPipeline != null) {
            try {
                if (crawl != null) {
                    try {
                        this.statusMapslock.lock();
                        this.crawlIdServiceIdMap.put(crawl.getId(), crawlPipeline.getUuid());
                        this.serviceIdServiceMap.put(crawlPipeline.getUuid(), crawlPipeline);
                        addCrawlIdMapping(this.endpointIdCrawlIdMap, crawl.getEndpointId(), crawl.getId());
                        addCrawlIdMapping(this.datamodelIdCrawlIdMap, crawl.getDatamodelId(), crawl.getId());
                        this.pipelineExecutor.execute(crawlPipeline);
                        this.statusMapslock.unlock();
                    } catch (Exception e) {
                        this.logger.error("Failed to setup processing pipeline", (Throwable) e);
                        error(crawlPipeline.getUuid());
                        this.statusMapslock.unlock();
                    }
                    return;
                }
            } catch (Throwable th) {
                this.statusMapslock.unlock();
                throw th;
            }
        }
        this.logger.warn("Nothing to enqueue - either pipeline or crawl are not set");
    }

    private void addCrawlIdMapping(Map<String, List<String>> map, String str, String str2) {
        List<String> arrayList = map.containsKey(str) ? map.get(str) : new ArrayList();
        arrayList.add(str2);
        map.put(str, arrayList);
    }

    protected void enqueue(CrawlPipeline crawlPipeline) {
        try {
            if (this.pipelineExecutor == null || this.pipelineExecutor.isShutdown() || this.pipelineExecutor.isTerminated()) {
                this.pipelineExecutor = Executors.newFixedThreadPool(getMaxPoolSize());
            }
            this.logger.info(String.format("Running sychronization with client [%s], interrupt timeout set to %s", crawlPipeline.getClass().getSimpleName(), Integer.valueOf(getTimeout())));
            Timer timer = new Timer();
            this.timeoutTimerTask = new TimeoutTimerTask(this.pipelineExecutor.submit(crawlPipeline), this.pipelineExecutor);
            timer.schedule(this.timeoutTimerTask, this.timeout * 1000);
        } catch (Exception e) {
            this.logger.error("An error occurred while executing synchronization", (Throwable) e);
        }
    }

    @Override // org.springframework.beans.factory.DisposableBean
    public void destroy() throws Exception {
        try {
            this.pipelineExecutor.shutdown();
            do {
            } while (!this.pipelineExecutor.isTerminated());
        } catch (Exception e) {
            this.logger.error("Error closing sync executor", (Throwable) e);
        }
    }

    @Override // eu.dariah.de.search.crawling.CrawlManagerImpl, de.unibamberg.minf.processing.listener.ProcessingListener
    public void start(UUID uuid) {
        super.start(uuid);
    }

    @Override // eu.dariah.de.search.crawling.CrawlManagerImpl, de.unibamberg.minf.processing.listener.ProcessingListener
    public void updateSize(UUID uuid, long j) {
        super.updateSize(uuid, j);
    }

    @Override // eu.dariah.de.search.crawling.CrawlManagerImpl, de.unibamberg.minf.processing.listener.ProcessingListener
    public void processed(UUID uuid, long j) {
        super.processed(uuid, j);
    }

    @Override // eu.dariah.de.search.crawling.CrawlManagerImpl, de.unibamberg.minf.processing.listener.ProcessingListener
    public void finished(UUID uuid) {
        cleanupCompleted(uuid);
        super.finished(uuid);
    }

    @Override // eu.dariah.de.search.crawling.CrawlManagerImpl, de.unibamberg.minf.processing.listener.ProcessingListener
    public void error(UUID uuid) {
        cleanupCompleted(uuid);
        super.error(uuid);
    }

    private void cleanupCompleted(UUID uuid) {
        CrawlPipeline crawlPipeline = this.serviceIdServiceMap.get(uuid);
        if (crawlPipeline == null || crawlPipeline.getCrawlId() == null) {
            this.logger.error("Failed to resolve pipeline for given service ID. Hashed id maps are probably inconsistent.");
            return;
        }
        Crawl findById = this.crawlService.findById(crawlPipeline.getCrawlId());
        if (findById == null) {
            this.logger.error("Failed to resolve crawl for given service ID. Hashed id maps are probably inconsistent.");
            return;
        }
        try {
            try {
                this.statusMapslock.lock();
                removeCrawlId(this.datamodelIdCrawlIdMap, findById.getDatamodelId(), findById.getId());
                removeCrawlId(this.endpointIdCrawlIdMap, findById.getEndpointId(), findById.getId());
                this.statusMapslock.unlock();
            } catch (Exception e) {
                this.logger.error("Failed to process completed service. Hashed id maps are probably inconsistent.");
                this.statusMapslock.unlock();
            }
        } catch (Throwable th) {
            this.statusMapslock.unlock();
            throw th;
        }
    }

    private void removeCrawlId(Map<String, List<String>> map, String str, String str2) {
        if (map.containsKey(str)) {
            List<String> list = map.get(str);
            list.remove(str2);
            if (list.size() == 0) {
                map.remove(str);
            } else {
                map.put(str, list);
            }
        }
    }

    private void setupSyncTimer() {
        this.syncTimer.scheduleAtFixedRate(new TimerTask() { // from class: eu.dariah.de.search.crawling.TimedCrawlManagerImpl.1
            @Override // java.util.TimerTask, java.lang.Runnable
            public void run() {
                try {
                    TimedCrawlManagerImpl.this.setupLock.lock();
                    TimedCrawlManagerImpl.this.lastSyncTimestamp = DateTime.now();
                    TimedCrawlManagerImpl.this.setupLock.unlock();
                    TimedCrawlManagerImpl.this.synchronize();
                } catch (Exception e) {
                    TimedCrawlManagerImpl.this.logger.error("Failed to execute sync task", (Throwable) e);
                }
            }
        }, getSyncInterval() * 1000, getSyncInterval() * 1000);
        this.logger.info(String.format("Scheduled synchronization every %s seconds", Integer.valueOf(getSyncInterval())));
    }
}
