package nl.wur.ssb.gff3;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.lang.reflect.Method;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;
import nl.wur.ssb.RDFSimpleCon.api.Domain;
import nl.wur.ssb.SappGeneric.Generic;
import nl.wur.ssb.SappGeneric.InputOutput.Output;
import nl.wur.ssb.conversion.App;
import nl.wur.ssb.conversion.fasta2rdf.Fasta;
import nl.wur.ssb.conversion.options.CommandOptionsGFF3;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.WordUtils;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.lang.TriX;
import org.apache.jena.sparql.sse.Tags;
import org.apache.jena.sparql.vocabulary.FOAF;
import org.apache.jena.vocabulary.DCTerms;
import org.apache.jena.vocabulary.VOID;
import org.apache.log4j.Logger;
import uk.ac.ebi.embl.api.entry.Entry;
import uk.ac.ebi.embl.api.entry.EntryFactory;
import uk.ac.ebi.embl.api.entry.feature.Feature;
import uk.ac.ebi.embl.api.entry.feature.FeatureFactory;
import uk.ac.ebi.embl.api.entry.location.Join;
import uk.ac.ebi.embl.api.entry.location.LocalRange;
import uk.ac.ebi.embl.api.entry.location.Location;
import uk.ac.ebi.embl.api.entry.location.LocationFactory;
import uk.ac.ebi.embl.api.entry.qualifier.Qualifier;
import uk.ac.ebi.embl.api.entry.qualifier.QualifierFactory;
import uk.ac.ebi.embl.api.entry.sequence.SequenceFactory;
import uk.ac.ebi.embl.api.gff3.GFF3Record;
import uk.ac.ebi.embl.api.gff3.GFF3RecordSet;
import uk.ac.ebi.embl.flatfile.writer.embl.EmblEntryWriter;
import uk.ac.ebi.embl.gff3.reader.GFF3FlatFileEntryReader;

/* loaded from: input_file:nl/wur/ssb/gff3/GFF3Mapper.class */
public class GFF3Mapper {
    private static Logger logger = Logger.getLogger(GFF3Mapper.class);
    private Entry entry;
    private Feature feature;
    private Qualifier qualifier;
    private Domain fastaDomain;
    public static CommandOptionsGFF3 arguments;
    private String[] gbolClasses = {"nl.wur.ssb.gbolclasses.features.ArtificialRecognizedRegion", "nl.wur.ssb.gbolclasses.features.AssemblyAnnotation", "nl.wur.ssb.gbolclasses.features.AssemblyGap", "nl.wur.ssb.gbolclasses.features.BiologicalRecognizedRegion", "nl.wur.ssb.gbolclasses.features.CDS", "nl.wur.ssb.gbolclasses.features.Centromere", "nl.wur.ssb.gbolclasses.features.CRISPRCassette", "nl.wur.ssb.gbolclasses.features.DLoop", "nl.wur.ssb.gbolclasses.features.Exon", "nl.wur.ssb.gbolclasses.features.Feature", "nl.wur.ssb.gbolclasses.features.FivePrimeUTR", "nl.wur.ssb.gbolclasses.features.Gene", "nl.wur.ssb.gbolclasses.features.GeneralFeature", "nl.wur.ssb.gbolclasses.features.GenomicFeature", "nl.wur.ssb.gbolclasses.features.Homology", "nl.wur.ssb.gbolclasses.features.ImmunoglobulinFeature", "nl.wur.ssb.gbolclasses.features.IntegratedVirus", "nl.wur.ssb.gbolclasses.features.Intron", "nl.wur.ssb.gbolclasses.features.MaturePeptide", "nl.wur.ssb.gbolclasses.features.MiscBinding", "nl.wur.ssb.gbolclasses.features.MiscFeature", "nl.wur.ssb.gbolclasses.features.MiscRecomb", "nl.wur.ssb.gbolclasses.features.MiscStructure", "nl.wur.ssb.gbolclasses.features.MiscVariation", "nl.wur.ssb.gbolclasses.features.MobileElement", "nl.wur.ssb.gbolclasses.features.ModifiedBase", "nl.wur.ssb.gbolclasses.features.ModifiedResidue", "nl.wur.ssb.gbolclasses.features.NAFeature", "nl.wur.ssb.gbolclasses.features.NaturalVariation", "nl.wur.ssb.gbolclasses.features.Operon", "nl.wur.ssb.gbolclasses.features.PolyASite", "nl.wur.ssb.gbolclasses.features.PrimerBinding", "nl.wur.ssb.gbolclasses.features.ProteinBinding", "nl.wur.ssb.gbolclasses.features.ProteinFeature", "nl.wur.ssb.gbolclasses.features.ProteinHomology", "nl.wur.ssb.gbolclasses.features.ProteinRepeat", "nl.wur.ssb.gbolclasses.features.ProteinStructure", "nl.wur.ssb.gbolclasses.features.RecognizedRegion", "nl.wur.ssb.gbolclasses.features.RegulationSite", "nl.wur.ssb.gbolclasses.features.RepeatFeature", "nl.wur.ssb.gbolclasses.features.RepeatRegion", "nl.wur.ssb.gbolclasses.features.ReplicationOrigin", "nl.wur.ssb.gbolclasses.features.SequenceAnnotation", "nl.wur.ssb.gbolclasses.features.SequenceTaggedSite", "nl.wur.ssb.gbolclasses.features.SignalPeptide", "nl.wur.ssb.gbolclasses.features.Source", "nl.wur.ssb.gbolclasses.features.StemLoop", "nl.wur.ssb.gbolclasses.features.StructureFeature", "nl.wur.ssb.gbolclasses.features.Telomere", "nl.wur.ssb.gbolclasses.features.ThreePrimeUTR", "nl.wur.ssb.gbolclasses.features.TranscriptFeature", "nl.wur.ssb.gbolclasses.features.TranscriptionElement", "nl.wur.ssb.gbolclasses.features.TransferOrigin", "nl.wur.ssb.gbolclasses.features.TransMembraneRegion", "nl.wur.ssb.gbolclasses.features.UnsureBases", "nl.wur.ssb.gbolclasses.features.UpdatedSequence", "nl.wur.ssb.gbolclasses.features.VariationFeature", "nl.wur.ssb.gbolclasses.Sample", "nl.wur.ssb.gbolclasses.sequences.Chromosome", "nl.wur.ssb.gbolclasses.sequences.CompleteNASequence", "nl.wur.ssb.gbolclasses.sequences.Contig", "nl.wur.ssb.gbolclasses.sequences.MaturedRNA", "nl.wur.ssb.gbolclasses.sequences.MiscRna", "nl.wur.ssb.gbolclasses.sequences.mRNA", "nl.wur.ssb.gbolclasses.sequences.NASequence", "nl.wur.ssb.gbolclasses.sequences.ncRNA", "nl.wur.ssb.gbolclasses.sequences.Plasmid", "nl.wur.ssb.gbolclasses.sequences.PrecursorRNA", "nl.wur.ssb.gbolclasses.sequences.Read", "nl.wur.ssb.gbolclasses.sequences.rRNA", "nl.wur.ssb.gbolclasses.sequences.Scaffold", "nl.wur.ssb.gbolclasses.sequences.Sequence", "nl.wur.ssb.gbolclasses.sequences.tmRNA", "nl.wur.ssb.gbolclasses.sequences.Transcript", "nl.wur.ssb.gbolclasses.sequences.tRNA", "nl.wur.ssb.gbolclasses.sequences.UncompleteNASequence", "nl.wur.ssb.gbolclasses.Thing"};
    private HashMap<String, String> allowed = new HashMap<>();
    private Set<String> missed = new HashSet();
    private EntryFactory entryFactory = new EntryFactory();
    private FeatureFactory featureFactory = new FeatureFactory();
    QualifierFactory qualifierFactory = new QualifierFactory();
    private LocationFactory locationFactory = new LocationFactory();
    String resourceBundle = "uk.ac.ebi.embl.gff3.mapping.gffMapper";

    public GFF3Mapper(String[] strArr) throws Exception {
        arguments = new CommandOptionsGFF3(strArr);
        logger = Generic.Logger(arguments.debug);
        logger.debug("Debug option enabled");
        getHandles();
        logger.info("Parsing FASTA");
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(Arrays.asList("-id", arguments.identifier, "-genome", "-contig", "-i", arguments.fasta.getAbsolutePath(), "-o", arguments.output.getAbsolutePath(), "-codon", String.valueOf(arguments.codon)));
        Fasta.app((String[]) arrayList.toArray(new String[arrayList.size()]));
        this.fastaDomain = new Domain("file://" + arguments.output);
        logger.info("Loaded " + this.fastaDomain.getRDFSimpleCon().getModel().size() + " triples ");
        logger.info("Parsing GFF");
        BufferedReader bufferedReader = arguments.input.get(0).endsWith(".gz") ? new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(new File(arguments.input.get(0)))), "UTF-8")) : new BufferedReader(new InputStreamReader(new FileInputStream(new File(arguments.input.get(0))), "UTF-8"));
        HashMap hashMap = new HashMap();
        String str = null;
        String str2 = null;
        File file = null;
        PrintWriter printWriter = null;
        String str3 = "";
        while (str3 != null) {
            str3 = bufferedReader.readLine();
            if (str3 == null) {
                break;
            }
            if (!str3.startsWith("#") && str3.length() > 1) {
                if (!str3.contains("transcription_start_site") && !str3.contains("transcription_end_site")) {
                    str2 = str3.split("\\s")[0];
                    if (str == null) {
                        file = new File(str3.split("\\s")[0] + ".tmp.gff");
                        printWriter = new PrintWriter(file);
                        str = str2;
                        hashMap.put(file.getName(), printWriter);
                    }
                    if (str.matches(str2)) {
                        printWriter.println(str3);
                    } else if (file.length() > 1048576) {
                        file = new File(str3.split("\\s")[0] + ".tmp.gff");
                        System.out.println("Starting new file: " + file);
                        printWriter.close();
                        printWriter = new PrintWriter(file);
                        printWriter.println(str3);
                        hashMap.put(file.getName(), printWriter);
                    } else {
                        printWriter.println(str3);
                    }
                }
            }
            str = str2;
        }
        printWriter.close();
        logger.info("Splitting GFF completed");
        HashSet hashSet = new HashSet();
        for (String str4 : hashMap.keySet()) {
            StringWriter stringWriter = new StringWriter();
            System.err.print("Parsing file: " + str4 + " of in total: " + hashMap.keySet().size() + "\r");
            ((PrintWriter) hashMap.get(str4)).close();
            GFF3FlatFileEntryReader gFF3FlatFileEntryReader = new GFF3FlatFileEntryReader(new BufferedReader(new FileReader(str4)));
            gFF3FlatFileEntryReader.skip();
            GFF3RecordSet entry = gFF3FlatFileEntryReader.getEntry();
            logger.info("Parsing GFF " + entry.getRecords().size() + " records");
            List<Entry> mapGFF3ToEntry = mapGFF3ToEntry(entry);
            if (this.missed.size() > 0) {
                logger.warn("The following properties are not captured by the ontology: " + StringUtils.join(this.missed, " "));
            }
            File file2 = new File(arguments.output + "." + str4 + ".embl");
            if (!arguments.debug && !arguments.gff2embl) {
                file2.deleteOnExit();
            }
            logger.info("Writing to intermediate EMBL file... " + file2.getAbsolutePath());
            Iterator<Entry> it = mapGFF3ToEntry.iterator();
            while (it.hasNext()) {
                new EmblEntryWriter(it.next()).write(stringWriter);
            }
            if (file2.exists()) {
                throw new Exception("File should not exists, bug in code detected " + file2);
            }
            Files.write(Paths.get(file2.getAbsolutePath(), new String[0]), stringWriter.toString().getBytes(), new OpenOption[0]);
            stringWriter.close();
            hashSet.add(file2);
        }
        Iterator it2 = hashSet.iterator();
        while (it2.hasNext()) {
            File file3 = (File) it2.next();
            logger.info("Converting " + file3);
            ArrayList arrayList2 = new ArrayList();
            arrayList2.addAll(Arrays.asList("-embl2rdf", "-input", file3.getAbsolutePath(), "-identifier", arguments.identifier, "-output", file3 + ".ttl", "-codon", String.valueOf(arguments.codon)));
            String[] strArr2 = (String[]) arrayList2.toArray(new String[arrayList2.size()]);
            System.err.println(StringUtils.join(strArr2, " "));
            App.main(strArr2);
        }
        File file4 = new File(arguments.output + ".dir");
        file4.mkdirs();
        Domain domain = new Domain("file://" + file4);
        Iterator it3 = hashSet.iterator();
        while (it3.hasNext()) {
            File file5 = (File) it3.next();
            logger.info("Parsing " + file5 + ".ttl with " + domain.getRDFSimpleCon().getModel().size() + " triples in the database");
            RDFDataMgr.read(domain.getRDFSimpleCon().getModel(), file5 + ".ttl");
        }
        domain.getRDFSimpleCon().setNsPrefix("gbol", "http://gbol.life/0.1/");
        domain.getRDFSimpleCon().setNsPrefix("terms", DCTerms.NS);
        domain.getRDFSimpleCon().setNsPrefix("prov", "http://www.w3.org/ns/prov#");
        domain.getRDFSimpleCon().setNsPrefix("void", VOID.NS);
        domain.getRDFSimpleCon().setNsPrefix("foaf", FOAF.NS);
        Output.save(domain, arguments.output);
        Iterator it4 = hashSet.iterator();
        while (it4.hasNext()) {
            File file6 = (File) it4.next();
            while (file6.exists()) {
                file6.delete();
            }
            while (new File(file6 + ".ttl").exists()) {
                new File(file6 + ".ttl").delete();
            }
        }
        FileUtils.deleteDirectory(file4);
    }

    private void getHandles() throws SecurityException, ClassNotFoundException {
        for (String str : this.gbolClasses) {
            for (Method method : Class.forName(str).getDeclaredMethods()) {
                String name = method.getName();
                if (name.contains("handle")) {
                    this.allowed.put(name, "");
                }
            }
        }
    }

    public List<Entry> mapGFF3ToEntry(GFF3RecordSet gFF3RecordSet) throws Exception {
        ArrayList<GFF3RecordElement> arrayList = new ArrayList();
        TreeSet treeSet = new TreeSet();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        for (GFF3Record gFF3Record : gFF3RecordSet.getRecords()) {
            String str = null;
            String str2 = null;
            for (Map.Entry<String, String> entry : gFF3Record.getAttributes().entrySet()) {
                String obj = entry.getKey().toString();
                String lowerCase = entry.getValue().toString().toLowerCase();
                WordUtils.capitalizeFully(obj.replaceAll("_", " ")).replaceAll(" +", "");
                if (obj.toLowerCase().matches("parent")) {
                    str2 = lowerCase;
                }
                if (obj.toLowerCase().matches(TriX.tagId)) {
                    str = lowerCase;
                }
            }
            if (str == null) {
                str = str2 + "___" + gFF3Record.getType();
            }
            if (treeSet.contains(str)) {
                boolean z = false;
                Iterator it = arrayList.iterator();
                while (true) {
                    if (!it.hasNext()) {
                        break;
                    }
                    GFF3RecordElement gFF3RecordElement = (GFF3RecordElement) it.next();
                    if (gFF3RecordElement.getId().equals(str)) {
                        gFF3RecordElement.addGFFRecord(gFF3Record);
                        if (gFF3Record.getType().toLowerCase().matches(Feature.EXON_FEATURE_NAME)) {
                            gFF3RecordElement.addExon(gFF3Record);
                        } else if (gFF3Record.getType().toLowerCase().matches("cds")) {
                            gFF3RecordElement.addCds(gFF3Record);
                        } else if (gFF3Record.getType().toLowerCase().matches(Feature.INTRON_FEATURE_NAME)) {
                        }
                        z = true;
                    }
                }
                if (!z) {
                    throw new Exception("Not found??...");
                }
            } else {
                treeSet.add(str);
                GFF3RecordElement gFF3RecordElement2 = new GFF3RecordElement();
                gFF3RecordElement2.setId(str);
                gFF3RecordElement2.setParent(str2);
                if (gFF3Record.getType().toLowerCase().matches(Feature.EXON_FEATURE_NAME)) {
                    gFF3RecordElement2.addExon(gFF3Record);
                } else if (gFF3Record.getType().toLowerCase().matches("cds")) {
                    gFF3RecordElement2.addCds(gFF3Record);
                }
                gFF3RecordElement2.addGFFRecord(gFF3Record);
                gFF3RecordElement2.setSequenceID(gFF3Record.getSequenceID());
                if (gFF3Record.getType().toLowerCase().matches("(region|chromosome)")) {
                    gFF3RecordElement2.setFeature(this.featureFactory.createFeature("source"));
                } else if (gFF3Record.getType().toLowerCase().endsWith("gene")) {
                    gFF3RecordElement2.setFeature(this.featureFactory.createFeature("gene"));
                } else if (gFF3Record.getType().toLowerCase().endsWith("trna")) {
                    gFF3RecordElement2.setFeature(this.featureFactory.createFeature("trna"));
                } else if (gFF3Record.getType().toLowerCase().endsWith("ncrna")) {
                    gFF3RecordElement2.setFeature(this.featureFactory.createFeature("ncrna"));
                } else if (gFF3Record.getType().toLowerCase().endsWith("rrna")) {
                    gFF3RecordElement2.setFeature(this.featureFactory.createFeature("rrna"));
                } else if (gFF3Record.getType().toLowerCase().endsWith("mrna")) {
                    gFF3RecordElement2.setFeature(this.featureFactory.createFeature("mrna"));
                } else if (gFF3Record.getType().toLowerCase().matches("(transcript|.*rna)")) {
                    gFF3RecordElement2.setFeature(this.featureFactory.createFeature("mrna"));
                } else {
                    gFF3RecordElement2.setFeature(this.featureFactory.createFeature(gFF3Record.getType()));
                }
                arrayList.add(gFF3RecordElement2);
                String sequenceID = gFF3RecordElement2.getSequenceID();
                if (hashMap2.containsKey(sequenceID)) {
                    sequenceID = (String) hashMap2.get(sequenceID);
                }
                this.entry = (Entry) hashMap.get(sequenceID);
                if (this.entry == null) {
                    this.entry = this.entryFactory.createEntry();
                    this.entry.setPrimaryAccession(sequenceID);
                    this.entry.setId(sequenceID);
                    logger.info("Parsing sequence id: " + sequenceID);
                    this.entry.setSequence(new SequenceFactory().createSequenceByte(this.fastaDomain.getRDFSimpleCon().runQuerySingleRes("getSequenceFromAccession.txt", true, sequenceID).getLitString(Tags.tagSequence).toLowerCase().getBytes()));
                    this.entry.getSequence().setAccession(sequenceID);
                    logger.info("Entry added with sequence");
                    hashMap.put(sequenceID, this.entry);
                }
                this.entry.addFeature(gFF3RecordElement2.getFeature());
            }
        }
        logger.info("Creation of combined records completed...");
        logger.info("Parental positioning...");
        int i = 0;
        for (GFF3RecordElement gFF3RecordElement3 : arrayList) {
            i++;
            System.out.print(i + "\t" + arrayList.size() + "\r");
            if (gFF3RecordElement3.getParent() != null) {
                GFF3RecordElement gFF3RecordElement4 = null;
                Iterator it2 = arrayList.iterator();
                while (true) {
                    if (!it2.hasNext()) {
                        break;
                    }
                    GFF3RecordElement gFF3RecordElement5 = (GFF3RecordElement) it2.next();
                    if (gFF3RecordElement5.getId().equals(gFF3RecordElement3.getParent())) {
                        gFF3RecordElement4 = gFF3RecordElement5;
                        break;
                    }
                }
                if (gFF3RecordElement4 != null && !gFF3RecordElement4.getRecords().get(0).getType().toLowerCase().contains("gene") && gFF3RecordElement4.getRecords().get(0).getType().toLowerCase().matches("(transcript|.*rna)")) {
                    String lowerCase2 = gFF3RecordElement3.getRecords().get(0).getType().toLowerCase();
                    if (lowerCase2.matches("cds") || lowerCase2.matches(Feature.EXON_FEATURE_NAME)) {
                        if (gFF3RecordElement3.getExons() != null && gFF3RecordElement3.getExons().getRecords().size() > 0) {
                            gFF3RecordSet = gFF3RecordElement3.getExons();
                        } else if (gFF3RecordElement3.getCds() != null && gFF3RecordElement3.getCds().getRecords().size() > 0) {
                            gFF3RecordSet = gFF3RecordElement3.getCds();
                        }
                        if (gFF3RecordElement4.getFeature().getLocations() != null) {
                            Iterator<GFF3Record> it3 = gFF3RecordSet.getRecords().iterator();
                            while (it3.hasNext()) {
                                gFF3RecordElement4.getFeature().getLocations().addLocation(createLocation(it3.next()));
                            }
                        } else {
                            Join join = new Join();
                            Iterator<GFF3Record> it4 = gFF3RecordSet.getRecords().iterator();
                            while (it4.hasNext()) {
                                join.addLocation(createLocation(it4.next()));
                            }
                            gFF3RecordElement4.getFeature().setLocations(join);
                        }
                    }
                }
            }
        }
        logger.info("Finalizing locations and qualifiers...");
        for (GFF3RecordElement gFF3RecordElement6 : arrayList) {
            if (gFF3RecordElement6.getFeature().getLocations() == null || gFF3RecordElement6.getFeature().getLocations().getMinPosition() == null) {
                Join join2 = new Join();
                Iterator<GFF3Record> it5 = gFF3RecordElement6.getRecords().iterator();
                while (it5.hasNext()) {
                    join2.addLocation(createLocation(it5.next()));
                }
                gFF3RecordElement6.getFeature().setLocations(join2);
            }
            ArrayList arrayList2 = new ArrayList();
            Iterator<GFF3Record> it6 = gFF3RecordElement6.getRecords().iterator();
            while (it6.hasNext()) {
                arrayList2.addAll(setQualifiers(it6.next()));
            }
            gFF3RecordElement6.getFeature().addQualifiers(arrayList2);
        }
        ArrayList arrayList3 = new ArrayList();
        arrayList3.addAll(hashMap.values());
        return arrayList3;
    }

    private Location createLocation(GFF3Record gFF3Record) {
        LocalRange createLocalRange = this.locationFactory.createLocalRange(Long.valueOf(gFF3Record.getStart()), Long.valueOf(gFF3Record.getEnd()));
        createLocalRange.setComplement(setStrand(gFF3Record.getStrand()));
        return createLocalRange;
    }

    private boolean setStrand(int i) {
        return i == -1;
    }

    private Collection<Qualifier> setQualifiers(GFF3Record gFF3Record) {
        ArrayList arrayList = new ArrayList();
        for (Map.Entry<String, String> entry : gFF3Record.getAttributes().entrySet()) {
            String obj = entry.getKey().toString();
            String lowerCase = entry.getValue().toString().toLowerCase();
            String replaceAll = WordUtils.capitalizeFully(obj.replaceAll("_", " ")).replaceAll(" +", "");
            if (replaceAll.matches("^Id$") && lowerCase.contains(":")) {
                lowerCase = lowerCase.split(":")[1];
            }
            if (this.allowed.get("handle" + replaceAll) != null) {
                this.qualifier = this.qualifierFactory.createQualifier(obj, lowerCase);
                arrayList.add(this.qualifier);
            } else if (obj.toLowerCase().matches("dbxref")) {
                this.qualifier = this.qualifierFactory.createQualifier(Qualifier.DB_XREF_QUALIFIER_NAME, lowerCase);
            } else {
                this.missed.add(replaceAll);
            }
        }
        return arrayList;
    }
}
