package nl.wur.ssb.diamond;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import life.gbol.domain.AnnotationResult;
import life.gbol.domain.FeatureProvenance;
import life.gbol.domain.Protein;
import life.gbol.domain.ProteinFeature;
import life.gbol.domain.ProteinHomology;
import life.gbol.domain.ProvenanceAnnotation;
import nl.wur.ssb.RDFSimpleCon.ExecCommand;
import nl.wur.ssb.RDFSimpleCon.api.Domain;
import nl.wur.ssb.SappGeneric.FASTA;
import nl.wur.ssb.SappGeneric.GBOL.SequenceBuilder;
import nl.wur.ssb.SappGeneric.Generic;
import nl.wur.ssb.SappGeneric.InputOutput.Output;
import nl.wur.ssb.conversion.options.CommandOptionsFormatConversion;
import nl.wur.ssb.conversion.rdfconversion.Conversion;
import org.apache.commons.codec.digest.MessageDigestAlgorithms;
import org.apache.jena.atlas.lib.Chars;
import org.apache.jena.ext.com.google.common.io.Files;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.log4j.Logger;
import org.purl.ontology.bibo.domain.Document;
import uk.ac.ebi.embl.api.entry.Entry;

/* loaded from: input_file:nl/wur/ssb/diamond/Diamond.class */
public class Diamond extends SequenceBuilder {
    private static final Logger logger = Logger.getLogger(Diamond.class);
    static CommandOptions options;

    public Diamond(String[] strArr) throws Exception {
        super(null, "http://gbol.life/0.1/");
        options = new CommandOptions(strArr);
        this.domain = options.domain;
        List<File> create = FASTA.create(options, Entry.PROTEIN);
        if (create.size() <= 0) {
            throw new Exception("No protein sequences found");
        }
        blast(create);
        Conversion.app(new CommandOptionsFormatConversion(new String[]{"-convert", "-i", options.output + ".nt", "-o", options.output.getAbsolutePath()}));
        new File(options.output + ".nt").delete();
    }

    public void blast(List<File> list) throws Exception {
        logger.info("Starting blast");
        HashSet hashSet = new HashSet();
        for (File file : list) {
            logger.debug("Database: " + options.db);
            if (options.resultFile == null) {
                options.resultFile = new File(file + ".diamond.blast");
            }
            String str = options.binary + " blastp --outfmt 6 qseqid salltitles pident length mismatch gaps qstart qend sstart send evalue bitscore full_sseq --no-auto-append -d " + options.db + " -k 25 -q " + file + " -o " + options.resultFile + " -t ./";
            logger.debug("Executing: " + str);
            Execute(str);
            hashSet.addAll(bestHitParser());
            options.resultFile = null;
        }
        this.domain.closeAndDelete();
        FileOutputStream fileOutputStream = new FileOutputStream(new File(options.output + ".nt"));
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            FileInputStream fileInputStream = new FileInputStream((File) it.next());
            byte[] bArr = new byte[8192];
            while (true) {
                int read = fileInputStream.read(bArr);
                if (read > 0) {
                    fileOutputStream.write(bArr, 0, read);
                }
            }
            fileInputStream.close();
        }
        fileOutputStream.close();
    }

    public HashSet<File> bestHitParser() throws Exception {
        ArrayList arrayList = new ArrayList();
        logger.debug("Reading: " + options.resultFile);
        Scanner scanner = new Scanner(options.resultFile);
        int i = 0;
        HashSet<File> hashSet = new HashSet<>();
        while (scanner.hasNextLine()) {
            i++;
            if (i % 1000 == 0) {
                logger.info("Parsed " + i + " lines");
                if (this.domain.getRDFSimpleCon().getModel().size() > 100000) {
                    Logger logger2 = logger;
                    long size = this.domain.getRDFSimpleCon().getModel().size();
                    File file = options.output;
                    logger2.info("Syncing intermediate results (" + size + " triples) to: " + logger2);
                    options.domain = this.domain;
                    File file2 = new File(options.resultFile + "_" + i + ".nt");
                    hashSet.add(file2);
                    Output.save(options.domain, file2);
                    this.domain.closeAndDelete();
                    this.domain = new Domain("file://" + Files.createTempDir().getAbsolutePath());
                    options.domain = this.domain;
                    options.annotResult = (AnnotationResult) this.domain.make(AnnotationResult.class, options.annotResultIRI);
                }
            }
            String[] split = scanner.nextLine().split("\t");
            String trim = split[0].trim();
            String str = split[1].split(" ")[0];
            String str2 = "";
            if (split[1].split(" ", 2).length > 1) {
                str2 = split[1].split(" ", 2)[1];
            }
            float parseFloat = Float.parseFloat(split[2].trim());
            int parseInt = Integer.parseInt(split[3].trim());
            int parseInt2 = Integer.parseInt(split[4].trim());
            int parseInt3 = Integer.parseInt(split[5].trim());
            int parseInt4 = Integer.parseInt(split[6].trim());
            int parseInt5 = Integer.parseInt(split[7].trim());
            int parseInt6 = Integer.parseInt(split[8].trim());
            int parseInt7 = Integer.parseInt(split[9].trim());
            double parseDouble = Double.parseDouble(split[10].trim());
            float parseFloat2 = Float.parseFloat(split[11].trim());
            String str3 = split[12];
            arrayList.add(str);
            Protein protein = (Protein) this.domain.make(Protein.class, trim);
            ProteinHomology proteinHomology = (ProteinHomology) this.domain.make(ProteinHomology.class, protein.getResource().getURI() + "/blast/" + str.replace("http://gbol.life/0.1/protein/", "").replace(Chars.S_VBAR, "_") + "/" + parseInt4 + "-" + parseInt5 + "/" + parseInt6 + "-" + parseInt7);
            this.featureURI = proteinHomology.getResource().getURI().replaceAll("/$", "") + "/";
            proteinHomology.setTargetRegion(makeRegion(parseInt6, parseInt7, "region", proteinHomology));
            proteinHomology.setLocation(makeRegion(parseInt4, parseInt5, "region", proteinHomology));
            FeatureProvenance featureProvenance = (FeatureProvenance) this.domain.make(FeatureProvenance.class, proteinHomology.getResource().getURI() + "/feature/prov");
            this.domain.disableCheck();
            featureProvenance.setOrigin(options.annotResult);
            this.domain.enableCheck();
            proteinHomology.addProvenance(featureProvenance);
            ProvenanceAnnotation provenanceAnnotation = (ProvenanceAnnotation) this.domain.make(ProvenanceAnnotation.class, proteinHomology.getResource().getURI() + "/blast/prov");
            Resource resource = provenanceAnnotation.getResource();
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/perc_identity"), parseFloat);
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/alignment_length"), parseInt);
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/mismatches"), parseInt2);
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/gaps"), parseInt3);
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/evalue"), parseDouble);
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/bitscore"), parseFloat2);
            Document document = (Document) this.domain.make(Document.class, "https://doi.org/10.1038/nmeth.3176");
            document.setAbstract("The alignment of sequencing reads against a protein reference database is a major computational bottleneck in metagenomics and data-intensive evolutionary projects. Although recent tools offer improved performance over the gold standard BLASTX, they exhibit only a modest speedup or low sensitivity. We introduce DIAMOND, an open-source algorithm based on double indexing that is 20,000 times faster than BLASTX on short reads and has a similar degree of sensitivity.");
            document.setDateAccepted(LocalDateTime.parse("2015-01-01T00:00:01"));
            document.setTitle("Fast and sensitive protein alignment using DIAMOND");
            provenanceAnnotation.setReference(document);
            featureProvenance.setAnnotation(provenanceAnnotation);
            Protein protein2 = (Protein) this.domain.make(Protein.class, "http://gbol.life/0.1/protein/" + Generic.checksum(str3, MessageDigestAlgorithms.SHA_384));
            protein2.setSequence(str3);
            protein2.addAccession(str.split(" ")[0]);
            proteinHomology.setHomologousTo(protein2.getResource().getURI());
            proteinHomology.setHomologousToDesc(str2);
            protein.addFeature((ProteinFeature) proteinHomology);
        }
        File file3 = new File(options.resultFile + "_" + i + ".nt");
        hashSet.add(file3);
        Output.save(options.domain, file3);
        logger.info("Parsed " + i + " lines");
        scanner.close();
        options.domain = this.domain;
        return hashSet;
    }

    private void Execute(String str) throws Exception {
        logger.info(str);
        ExecCommand execCommand = new ExecCommand(str);
        logger.info("Stdout: " + execCommand.getOutput());
        if (execCommand.getExit() > 0) {
            logger.error("Stderr: " + execCommand.getError());
            logger.info("Exit code: " + execCommand.getExit());
            throw new Exception("Execution failed");
        }
    }
}
