package nl.wur.ssb.prodigal;

import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.List;
import java.util.Objects;
import java.util.Scanner;
import life.gbol.domain.CDS;
import life.gbol.domain.Chromosome;
import life.gbol.domain.Contig;
import life.gbol.domain.Exon;
import life.gbol.domain.ExonList;
import life.gbol.domain.FeatureProvenance;
import life.gbol.domain.Gene;
import life.gbol.domain.NAFeature;
import life.gbol.domain.NASequence;
import life.gbol.domain.Plasmid;
import life.gbol.domain.Protein;
import life.gbol.domain.ProvenanceAnnotation;
import life.gbol.domain.Scaffold;
import life.gbol.domain.Transcript;
import life.gbol.domain.TranscriptFeature;
import life.gbol.domain.mRNA;
import nl.wur.ssb.RDFSimpleCon.ResultLine;
import nl.wur.ssb.RDFSimpleCon.api.Domain;
import nl.wur.ssb.SappGeneric.CommandOptionsGeneric;
import nl.wur.ssb.SappGeneric.ExecCommand;
import nl.wur.ssb.SappGeneric.FASTA;
import nl.wur.ssb.SappGeneric.GBOL.SequenceBuilder;
import nl.wur.ssb.SappGeneric.Generic;
import nl.wur.ssb.SappGeneric.InputOutput.Output;
import org.apache.commons.codec.digest.MessageDigestAlgorithms;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.sparql.engine.optimizer.StatsMatcher;
import org.apache.jena.sparql.sse.Tags;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.purl.ontology.bibo.domain.Document;
import uk.ac.ebi.embl.api.entry.qualifier.Qualifier;

/* loaded from: input_file:nl/wur/ssb/prodigal/Prodigal.class */
public class Prodigal extends SequenceBuilder {
    private static Logger logger = LogManager.getLogger();

    public Prodigal(String[] strArr) throws Exception {
        super(null, "");
        CommandOptionsProdigal commandOptionsProdigal = new CommandOptionsProdigal(strArr);
        this.domain = commandOptionsProdigal.domain;
        CommandOptionsGeneric commandOptionsGeneric = new CommandOptionsGeneric();
        commandOptionsGeneric.input = commandOptionsProdigal.input;
        commandOptionsGeneric.domain = commandOptionsProdigal.domain;
        List<File> create = FASTA.create(commandOptionsGeneric, "genome");
        String prepareBinaryFromJar = Generic.prepareBinaryFromJar("binaries/{OS}/prodigal/2.6.3/prodigal");
        if (commandOptionsProdigal.codon <= 0) {
            logger.info("Obtaining codon information from RDF file");
            commandOptionsProdigal.codon = this.domain.getRDFSimpleCon().runQuerySingleRes("getTranslTable.txt", true, new Object[0]).getLitInt(Tags.tagTable);
        }
        logger.info("Translation table set to " + commandOptionsProdigal.codon);
        for (File file : create) {
            logger.info("Analysing: " + file);
            if (new File(file + "_genecaller.gff").exists()) {
                logger.info(file + "_genecaller.gff exists... skipping gene prediction");
            } else if (commandOptionsProdigal.meta) {
                String[] strArr2 = {prepareBinaryFromJar, "-q", "-f", "gff", "-o", file + "_genecaller.gff", "-a", file + "_proteins.fasta", "-p", StatsMatcher.META, "-g", String.valueOf(commandOptionsProdigal.codon), "-d", file + "_nucleotide.fasta", "-i", file.getAbsolutePath()};
                logger.info(StringUtils.join(strArr2, " "));
                new ExecCommand(strArr2);
            } else {
                String[] strArr3 = {prepareBinaryFromJar, "-q", "-f", "gff", "-o", file + "_genecaller.gff", "-a", file + "_proteins.fasta", "-p", "single", "-g", String.valueOf(commandOptionsProdigal.codon), "-t", file + "_genecaller.trn", "-d", file + "_nucleotide.fasta", "-i", file.getAbsolutePath()};
                logger.info(StringUtils.join(strArr3, " "));
                new ExecCommand(strArr3);
                String[] strArr4 = {prepareBinaryFromJar, "-q", "-f", "gff", "-o", file + "_genecaller.gff", "-a", file + "_proteins.fasta", "-p", "single", "-g", String.valueOf(commandOptionsProdigal.codon), "-t", file + "_genecaller.trn", "-d", file + "_nucleotide.fasta", "-i", file.getAbsolutePath()};
                logger.info(StringUtils.join(strArr4, " "));
                new ExecCommand(strArr4);
            }
            geneParser(commandOptionsProdigal, file);
        }
        Output.save(this.domain, commandOptionsProdigal.output);
    }

    public void geneParser(CommandOptionsProdigal commandOptionsProdigal, File file) throws Exception {
        long j;
        long j2;
        boolean z;
        NASequence nASequence;
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        HashMap hashMap3 = new HashMap();
        String str = file.getName().split("__#__")[0];
        String name = commandOptionsProdigal.input.getName();
        logger.info("Building gff dictionary");
        Scanner scanner = new Scanner(new FileReader(file + "_genecaller.gff"));
        while (scanner.hasNextLine()) {
            String nextLine = scanner.nextLine();
            if (!nextLine.startsWith("#")) {
                hashMap2.put(nextLine.split("ID=")[1].split(";")[0], nextLine);
            }
        }
        if (!commandOptionsProdigal.debug) {
            new File(name + "_nucleotide.fasta").deleteOnExit();
        }
        Scanner scanner2 = new Scanner(new FileReader(file + "_nucleotide.fasta"));
        logger.info("Building gene dictionary");
        String str2 = null;
        while (scanner2.hasNext()) {
            String nextLine2 = scanner2.nextLine();
            if (nextLine2.startsWith(">")) {
                str2 = nextLine2;
                hashMap.put(str2, "");
            } else {
                hashMap.put(str2, ((String) hashMap.get(str2)) + nextLine2);
            }
        }
        scanner2.close();
        Scanner scanner3 = new Scanner(new FileReader(file + "_proteins.fasta"));
        if (!commandOptionsProdigal.debug) {
            new File(name + "_proteins.fasta").deleteOnExit();
        }
        logger.info("Building protein dictionary");
        while (scanner3.hasNext()) {
            String nextLine3 = scanner3.nextLine();
            if (nextLine3.startsWith(">")) {
                str2 = nextLine3;
                hashMap3.put(str2, "");
            } else {
                hashMap3.put(str2, ((String) hashMap3.get(str2)) + nextLine3);
            }
        }
        scanner3.close();
        HashMap hashMap4 = new HashMap();
        for (ResultLine resultLine : this.domain.getRDFSimpleCon().runQuery("getContigTypes.txt", true, new Object[0])) {
            hashMap4.put(resultLine.getIRI("seqobject"), resultLine.getIRI("type"));
        }
        HashMap hashMap5 = new HashMap();
        for (ResultLine resultLine2 : this.domain.getRDFSimpleCon().runQuery("getAllLocusTag.txt", true, new Object[0])) {
            hashMap5.put(resultLine2.getIRI("gene"), resultLine2.getLitString("locus"));
        }
        logger.info("Parsing gene dictionary");
        int i = 0;
        int i2 = 1;
        for (String str3 : hashMap.keySet()) {
            i++;
            if (i % 100 == 0) {
                System.out.print("Parsed " + i + " genes of " + hashMap.size() + " database size " + this.domain.getRDFSimpleCon().getModel().size() + "\r");
            }
            String[] split = str3.split(" ");
            long parseInt = Integer.parseInt(split[2]);
            long parseInt2 = Integer.parseInt(split[4]);
            if (parseInt < parseInt2) {
                j = parseInt;
                j2 = parseInt2;
            } else {
                j = parseInt2;
                j2 = parseInt;
            }
            String str4 = split[6];
            if (str4.matches("-1")) {
                z = true;
            } else {
                if (!str4.matches("1")) {
                    throw new Exception("Formatting of strand is wrong");
                }
                z = false;
            }
            String str5 = split[8];
            String str6 = ((String) hashMap2.get(str5.split(";")[0].split("=")[1])).split("\t")[8];
            String str7 = str6.split(";")[6].split("=")[1];
            String str8 = str6.split(";")[7].split("=")[1];
            String str9 = str6.split(";")[8].split("=")[1];
            String str10 = str6.split(";")[9].split("=")[1];
            String str11 = str6.split(";")[10].split("=")[1];
            String str12 = str6.split(";")[11].split("=")[1];
            String str13 = str6.split(";")[12].split("=")[1];
            String str14 = str5.split(";")[1].split("=")[1];
            boolean z2 = str14.startsWith("1");
            boolean z3 = str14.endsWith("1");
            String str15 = str5.split(";")[2].split("=")[1];
            String str16 = str5.split(";")[3].split("=")[1];
            String str17 = str5.split(";")[4].split("=")[1];
            String str18 = str5.split(";")[5].split("=")[1];
            String replaceAll = ((String) hashMap3.get(str3)).replaceAll("\\*$", "");
            String str19 = (String) hashMap.get(str3);
            String checksum = Generic.checksum(str19, MessageDigestAlgorithms.SHA_384);
            String checksum2 = Generic.checksum(replaceAll, MessageDigestAlgorithms.SHA_384);
            Protein protein = (Protein) this.domain.make(Protein.class, "http://gbol.life/0.1/protein/" + checksum2);
            protein.setSha384(checksum2);
            protein.setSequence(replaceAll);
            String replaceAll2 = str3.split(" ")[0].replace(">", "").replaceAll("_[0-9]+$", "");
            String str20 = (String) hashMap4.get(replaceAll2);
            if (str20 == null) {
                nASequence = (NASequence) this.domain.make(Contig.class, replaceAll2);
                logger.debug("Unknown genome format detected... Using contig instead");
            } else if (str20.toLowerCase().contains("scaffold")) {
                nASequence = (NASequence) this.domain.make(Scaffold.class, replaceAll2);
            } else if (str20.toLowerCase().contains("contig")) {
                nASequence = (NASequence) this.domain.make(Contig.class, replaceAll2);
            } else if (str20.toLowerCase().contains(Qualifier.CHROMOSOME_QUALIFIER_NAME)) {
                nASequence = (NASequence) this.domain.make(Chromosome.class, replaceAll2);
            } else if (str20.toLowerCase().contains(Qualifier.PLASMID_QUALIFIER_NAME)) {
                nASequence = (NASequence) this.domain.make(Plasmid.class, replaceAll2);
            } else {
                nASequence = (NASequence) this.domain.make(Contig.class, replaceAll2);
                logger.debug("Unknown genome format detected... Using contig instead");
            }
            this.featureURI = nASequence.getResource().getURI();
            Domain domain = this.domain;
            Gene gene = (Gene) domain.make(Gene.class, nASequence.getResource().getURI() + "/gene/" + j + "-" + domain);
            gene.setLocation(makeRegion(j, j2, z, false, z2, z3, gene));
            Domain domain2 = this.domain;
            String uri = gene.getResource().getURI();
            Objects.requireNonNull(commandOptionsProdigal);
            Objects.requireNonNull(commandOptionsProdigal);
            FeatureProvenance featureProvenance = (FeatureProvenance) domain2.make(FeatureProvenance.class, uri + "/" + "prodigal" + "/" + "2.6.3");
            Domain domain3 = this.domain;
            String uri2 = gene.getResource().getURI();
            Objects.requireNonNull(commandOptionsProdigal);
            Objects.requireNonNull(commandOptionsProdigal);
            ProvenanceAnnotation provenanceAnnotation = (ProvenanceAnnotation) domain3.make(ProvenanceAnnotation.class, uri2 + "/" + "prodigal" + "/" + "2.6.3" + "/prov");
            Resource resource = provenanceAnnotation.getResource();
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/gc_cont"), str18);
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/rbs_motif"), str16);
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/rbs_spacer"), str17);
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/start_type"), str15);
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/score"), Float.valueOf(str8));
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/conf"), Float.valueOf(str7));
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/cscore"), Float.valueOf(str9));
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/rscore"), Float.valueOf(str11));
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/sscore"), Float.valueOf(str10));
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/tscore"), Float.valueOf(str13));
            resource.addLiteral(ResourceFactory.createProperty("http://gbol.life/0.1/uscore"), Float.valueOf(str12));
            provenanceAnnotation.setReference((Document) this.domain.make(Document.class, "http://doi.org"));
            featureProvenance.setAnnotation(provenanceAnnotation);
            this.domain.disableCheck();
            featureProvenance.setOrigin(commandOptionsProdigal.annotResult);
            this.domain.enableCheck();
            gene.addProvenance(featureProvenance);
            if (hashMap5.containsKey(gene.getResource().getURI())) {
                gene.setLocusTag((String) hashMap5.get(gene.getResource().getURI()));
            } else {
                gene.setLocusTag(str + "_" + i2);
                i2 = i2 == 1 ? 5 : i2 + 5;
            }
            nASequence.addFeature((NAFeature) gene);
            Domain domain4 = this.domain;
            Transcript transcript = (mRNA) domain4.make(mRNA.class, nASequence.getResource().getURI() + "/mrna/" + j + "-" + domain4);
            Domain domain5 = this.domain;
            Exon exon = (Exon) domain5.make(Exon.class, nASequence.getResource().getURI() + "/exon/" + j + "-" + domain5);
            exon.setLocation(makeRegion(j, j2, z, false, z2, z3, exon));
            exon.addProvenance(featureProvenance);
            Domain domain6 = this.domain;
            ExonList exonList = (ExonList) domain6.make(ExonList.class, nASequence.getResource().getURI() + "/exonlist/" + j + "-" + domain6);
            exonList.addExon(exon);
            Domain domain7 = this.domain;
            CDS cds = (CDS) domain7.make(CDS.class, nASequence.getResource().getURI() + "/cds/" + j + "-" + domain7);
            cds.setLocation(makeRegion(1L, str19.length(), "cds", cds));
            protein.setLength(Long.valueOf(replaceAll.length()));
            cds.setProtein(protein);
            cds.addProvenance(featureProvenance);
            transcript.addFeature((TranscriptFeature) cds);
            transcript.setSha384(checksum);
            transcript.setExonList(exonList);
            transcript.setSequence(str19);
            transcript.setLength(Long.valueOf(str19.length()));
            gene.addExon(exon);
            gene.addTranscript(transcript);
            nASequence.setTranslTable(Integer.valueOf(commandOptionsProdigal.codon));
        }
        Output.save(this.domain, commandOptionsProdigal.output);
        if (commandOptionsProdigal.debug) {
            return;
        }
        new File(file + "_proteins.fasta").deleteOnExit();
        new File(file + "_nucleotide.fasta").deleteOnExit();
        new File(file + "_genecaller.gff").deleteOnExit();
        file.deleteOnExit();
    }
}
