package info.bioinfweb.jphyloio.formats.mega;

import info.bioinfweb.commons.bio.CharacterStateSetType;
import info.bioinfweb.commons.bio.CharacterSymbolMeaning;
import info.bioinfweb.commons.io.StreamLocationProvider;
import info.bioinfweb.commons.text.StringUtils;
import info.bioinfweb.jphyloio.ReadWriteConstants;
import info.bioinfweb.jphyloio.ReadWriteParameterMap;
import info.bioinfweb.jphyloio.events.CharacterSetIntervalEvent;
import info.bioinfweb.jphyloio.events.ConcreteJPhyloIOEvent;
import info.bioinfweb.jphyloio.events.JPhyloIOEvent;
import info.bioinfweb.jphyloio.events.LabeledIDEvent;
import info.bioinfweb.jphyloio.events.LinkedLabeledIDEvent;
import info.bioinfweb.jphyloio.events.PartEndEvent;
import info.bioinfweb.jphyloio.events.SequenceTokensEvent;
import info.bioinfweb.jphyloio.events.SingleTokenDefinitionEvent;
import info.bioinfweb.jphyloio.events.TokenSetDefinitionEvent;
import info.bioinfweb.jphyloio.events.meta.LiteralContentSequenceType;
import info.bioinfweb.jphyloio.events.meta.LiteralMetadataContentEvent;
import info.bioinfweb.jphyloio.events.meta.LiteralMetadataEvent;
import info.bioinfweb.jphyloio.events.meta.URIOrStringIdentifier;
import info.bioinfweb.jphyloio.events.type.EventContentType;
import info.bioinfweb.jphyloio.events.type.EventTopologyType;
import info.bioinfweb.jphyloio.exception.JPhyloIOReaderException;
import info.bioinfweb.jphyloio.formats.JPhyloIOFormatIDs;
import info.bioinfweb.jphyloio.formats.text.AbstractTextEventReader;
import info.bioinfweb.jphyloio.formats.text.KeyValueInformation;
import info.bioinfweb.jphyloio.formats.text.TextReaderStreamDataProvider;
import info.bioinfweb.jphyloio.utils.IDToNameManager;
import java.io.BufferedReader;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.namespace.QName;

/* loaded from: input_file:info/bioinfweb/jphyloio/formats/mega/MEGAEventReader.class */
public class MEGAEventReader extends AbstractTextEventReader<TextReaderStreamDataProvider<MEGAEventReader>> implements MEGAConstants, ReadWriteConstants {
    private static final String GENE_DONAIN_COMMAND_PATTERN_SUFFIX = "\\s*\\=\\s*(\\w+).*";
    private boolean isInterleaved;
    private String currentSequenceName;
    private String firstSequenceName;
    private long charactersRead;
    private long currentLabelPos;
    private String currentGeneOrDomainName;
    private long currentGeneOrDomainStart;
    private IDToNameManager sequenceIDToNameManager;
    private TokenSetDefinitionEvent tokenSetRootEvent;
    private List<SingleTokenDefinitionEvent> singleTokenDefinitions;
    private static final Pattern READ_COMMAND_PATTERN = Pattern.compile(".+(\\[|\\;)", 32);
    private static final Pattern SEQUENCE_NAME_PATTERN = Pattern.compile(".+\\s+");
    private static final Pattern GENE_COMMAND_PATTERN = Pattern.compile(".*GENE\\s*\\=\\s*(\\w+).*", 2);
    private static final Pattern DOMAIN_COMMAND_PATTERN = Pattern.compile(".*DOMAIN\\s*\\=\\s*(\\w+).*", 2);

    public MEGAEventReader(BufferedReader bufferedReader, ReadWriteParameterMap readWriteParameterMap) throws IOException {
        super(bufferedReader, readWriteParameterMap, readWriteParameterMap.getMatchToken());
        this.isInterleaved = false;
        this.currentSequenceName = null;
        this.firstSequenceName = null;
        this.charactersRead = 0L;
        this.currentLabelPos = 0L;
        this.currentGeneOrDomainName = null;
        this.currentGeneOrDomainStart = -1L;
        this.sequenceIDToNameManager = new IDToNameManager("seq");
        this.tokenSetRootEvent = null;
        this.singleTokenDefinitions = new ArrayList(2);
    }

    public MEGAEventReader(File file, ReadWriteParameterMap readWriteParameterMap) throws IOException {
        super(file, readWriteParameterMap, readWriteParameterMap.getMatchToken());
        this.isInterleaved = false;
        this.currentSequenceName = null;
        this.firstSequenceName = null;
        this.charactersRead = 0L;
        this.currentLabelPos = 0L;
        this.currentGeneOrDomainName = null;
        this.currentGeneOrDomainStart = -1L;
        this.sequenceIDToNameManager = new IDToNameManager("seq");
        this.tokenSetRootEvent = null;
        this.singleTokenDefinitions = new ArrayList(2);
    }

    public MEGAEventReader(InputStream inputStream, ReadWriteParameterMap readWriteParameterMap) throws IOException {
        super(inputStream, readWriteParameterMap, readWriteParameterMap.getMatchToken());
        this.isInterleaved = false;
        this.currentSequenceName = null;
        this.firstSequenceName = null;
        this.charactersRead = 0L;
        this.currentLabelPos = 0L;
        this.currentGeneOrDomainName = null;
        this.currentGeneOrDomainStart = -1L;
        this.sequenceIDToNameManager = new IDToNameManager("seq");
        this.tokenSetRootEvent = null;
        this.singleTokenDefinitions = new ArrayList(2);
    }

    public MEGAEventReader(Reader reader, ReadWriteParameterMap readWriteParameterMap) throws IOException {
        super(reader, readWriteParameterMap, readWriteParameterMap.getMatchToken());
        this.isInterleaved = false;
        this.currentSequenceName = null;
        this.firstSequenceName = null;
        this.charactersRead = 0L;
        this.currentLabelPos = 0L;
        this.currentGeneOrDomainName = null;
        this.currentGeneOrDomainStart = -1L;
        this.sequenceIDToNameManager = new IDToNameManager("seq");
        this.tokenSetRootEvent = null;
        this.singleTokenDefinitions = new ArrayList(2);
    }

    @Override // info.bioinfweb.jphyloio.JPhyloIOFormatSpecificObject
    public String getFormatID() {
        return JPhyloIOFormatIDs.MEGA_FORMAT_ID;
    }

    private void checkStart() throws IOException {
        if (!MEGAConstants.FIRST_LINE.equals(getReader().readString(MEGAConstants.FIRST_LINE.length()).toUpperCase())) {
            throw new JPhyloIOReaderException("All MEGA files must start with \"#MEGA\".", 0L, 0L, 0L);
        }
    }

    private void processFormatSubcommand(String str, String str2) {
        if (str.toUpperCase().equals(MEGAConstants.FORMAT_SUBCOMMAND_IDENTICAL)) {
            getSequenceTokensEventManager().setMatchToken(str2);
        }
    }

    private CharacterStateSetType getTokenSetType(String str) {
        return str.equals("NUCLEOTIDE") ? CharacterStateSetType.NUCLEOTIDE : str.equals("DNA") ? CharacterStateSetType.DNA : str.equals("RNA") ? CharacterStateSetType.RNA : str.equals("PROTEIN") ? CharacterStateSetType.AMINO_ACID : CharacterStateSetType.UNKNOWN;
    }

    /* JADX WARN: Multi-variable type inference failed */
    private void readFormatCommand() throws IOException {
        Long l;
        try {
            getReader().skip("FORMAT".length());
            consumeWhiteSpaceAndComments('!', ']');
            while (getReader().peekChar() != ';') {
                KeyValueInformation readKeyValueInformation = readKeyValueInformation(';', '[', ']', '=');
                processFormatSubcommand(readKeyValueInformation.getOriginalKey(), readKeyValueInformation.getValue());
                String upperCase = readKeyValueInformation.getOriginalKey().toUpperCase();
                try {
                    l = Long.valueOf(Long.parseLong(readKeyValueInformation.getValue()));
                } catch (NumberFormatException e) {
                    l = null;
                }
                if (MEGAConstants.FORMAT_SUBCOMMAND_NSITES.equals(upperCase)) {
                    if (l == null) {
                        throw new JPhyloIOReaderException("The column count (NSITES) found in the document (\"" + readKeyValueInformation.getValue() + "\") is not a valid integer.", (StreamLocationProvider) getReader());
                    }
                    getCurrentEventCollection().add(new LiteralMetadataEvent("meta" + ((TextReaderStreamDataProvider) getStreamDataProvider()).getIDManager().createNewID(), readKeyValueInformation.getOriginalKey(), new URIOrStringIdentifier(readKeyValueInformation.getOriginalKey(), PREDICATE_CHARACTER_COUNT), LiteralContentSequenceType.SIMPLE));
                    getCurrentEventCollection().add(new LiteralMetadataContentEvent(l, readKeyValueInformation.getValue()));
                    getCurrentEventCollection().add(ConcreteJPhyloIOEvent.createEndEvent(EventContentType.LITERAL_META));
                } else if (MEGAConstants.FORMAT_SUBCOMMAND_NTAXA.equals(upperCase)) {
                    if (l == null) {
                        throw new JPhyloIOReaderException("The sequence count (NTAX) found in the document (\"" + readKeyValueInformation.getValue() + "\") is not a valid integer.", (StreamLocationProvider) getReader());
                    }
                    getCurrentEventCollection().add(new LiteralMetadataEvent("meta" + ((TextReaderStreamDataProvider) getStreamDataProvider()).getIDManager().createNewID(), readKeyValueInformation.getOriginalKey(), new URIOrStringIdentifier(readKeyValueInformation.getOriginalKey(), PREDICATE_SEQUENCE_COUNT), LiteralContentSequenceType.SIMPLE));
                    getCurrentEventCollection().add(new LiteralMetadataContentEvent(l, readKeyValueInformation.getValue()));
                    getCurrentEventCollection().add(ConcreteJPhyloIOEvent.createEndEvent(EventContentType.LITERAL_META));
                } else if ("DATATYPE".equals(upperCase)) {
                    if (this.tokenSetRootEvent != null) {
                        throw new JPhyloIOReaderException("Duplicate token set definition in MEGA FORMAT command.", (StreamLocationProvider) ((TextReaderStreamDataProvider) getStreamDataProvider()).getDataReader());
                    }
                    this.tokenSetRootEvent = new TokenSetDefinitionEvent(getTokenSetType(readKeyValueInformation.getValue().toUpperCase()), ReadWriteConstants.DEFAULT_TOKEN_SET_ID_PREFIX + ((TextReaderStreamDataProvider) getStreamDataProvider()).getIDManager().createNewID(), null);
                } else if ("MISSING".equals(upperCase)) {
                    this.singleTokenDefinitions.add(new SingleTokenDefinitionEvent(ReadWriteConstants.DEFAULT_TOKEN_DEFINITION_ID_PREFIX + ((TextReaderStreamDataProvider) getStreamDataProvider()).getIDManager().createNewID(), null, readKeyValueInformation.getValue(), CharacterSymbolMeaning.MISSING, null));
                } else if (MEGAConstants.FORMAT_SUBCOMMAND_INDEL.equals(upperCase)) {
                    this.singleTokenDefinitions.add(new SingleTokenDefinitionEvent(ReadWriteConstants.DEFAULT_TOKEN_DEFINITION_ID_PREFIX + ((TextReaderStreamDataProvider) getStreamDataProvider()).getIDManager().createNewID(), null, readKeyValueInformation.getValue(), CharacterSymbolMeaning.GAP, null));
                } else {
                    if (MEGAConstants.FORMAT_SUBCOMMAND_DATA_FORMAT.equals(upperCase)) {
                        this.isInterleaved = MEGAConstants.FORMAT_VALUE_INTERLEAVED_DATA_FORMAT.equals(readKeyValueInformation.getValue().toUpperCase());
                    }
                    getCurrentEventCollection().add(new LiteralMetadataEvent("meta" + ((TextReaderStreamDataProvider) getStreamDataProvider()).getIDManager().createNewID(), readKeyValueInformation.getOriginalKey(), new URIOrStringIdentifier(readKeyValueInformation.getOriginalKey(), new QName(MEGAConstants.MEGA_PREDICATE_NAMESPACE, "FORMAT." + readKeyValueInformation.getOriginalKey().toUpperCase())), LiteralContentSequenceType.SIMPLE));
                    getCurrentEventCollection().add(new LiteralMetadataContentEvent(readKeyValueInformation.getValue(), false));
                    getCurrentEventCollection().add(ConcreteJPhyloIOEvent.createEndEvent(EventContentType.LITERAL_META));
                }
            }
            if (this.tokenSetRootEvent != null) {
                getCurrentEventCollection().add(this.tokenSetRootEvent);
                Iterator<SingleTokenDefinitionEvent> it = this.singleTokenDefinitions.iterator();
                while (it.hasNext()) {
                    getCurrentEventCollection().add(it.next());
                    getCurrentEventCollection().add(ConcreteJPhyloIOEvent.createEndEvent(EventContentType.SINGLE_TOKEN_DEFINITION));
                }
                getCurrentEventCollection().add(ConcreteJPhyloIOEvent.createEndEvent(EventContentType.TOKEN_SET_DEFINITION));
            }
            getReader().skip(1L);
        } catch (EOFException e2) {
            throw new JPhyloIOReaderException("Unexpected end of file in FORMAT command.", (StreamLocationProvider) getReader(), (Throwable) e2);
        }
    }

    private URIOrStringIdentifier createCommandPredicate(String str) {
        return new URIOrStringIdentifier(str, new QName(MEGAConstants.MEGA_PREDICATE_NAMESPACE, str.toUpperCase()));
    }

    /* JADX WARN: Multi-variable type inference failed */
    private void addMetaEventsFromCommand(String str) throws IOException {
        int indexOfWhiteSpace = StringUtils.indexOfWhiteSpace(str);
        if (indexOfWhiteSpace < 1) {
            getCurrentEventCollection().add(new LiteralMetadataEvent("meta" + ((TextReaderStreamDataProvider) getStreamDataProvider()).getIDManager().createNewID(), null, new URIOrStringIdentifier(null, PREDICATE_HAS_LITERAL_METADATA), LiteralContentSequenceType.SIMPLE));
        } else {
            String substring = str.substring(0, indexOfWhiteSpace);
            str = str.substring(indexOfWhiteSpace).trim();
            getCurrentEventCollection().add(new LiteralMetadataEvent("meta" + ((TextReaderStreamDataProvider) getStreamDataProvider()).getIDManager().createNewID(), substring, createCommandPredicate(substring), LiteralContentSequenceType.SIMPLE));
        }
        getCurrentEventCollection().add(new LiteralMetadataContentEvent(str, false));
        getCurrentEventCollection().add(ConcreteJPhyloIOEvent.createEndEvent(EventContentType.LITERAL_META));
    }

    private void createCharacterSetEventsFromLabel() throws IOException {
        getReader().skip(MEGAConstants.COMMAND_NAME_LABEL.length());
        long j = this.currentLabelPos;
        long j2 = -1;
        char c = '_';
        char readChar = getReader().readChar();
        getCurrentEventCollection().add(new LabeledIDEvent(EventContentType.CHARACTER_SET, MEGAConstants.LABEL_CHAR_SET_ID, Character.toString('_')));
        while (readChar != ';') {
            if (!Character.isWhitespace(readChar)) {
                if (readChar == '[') {
                    readComment('[', ']');
                } else {
                    if ((c == '_' && readChar != '_') || readChar != c) {
                        if (readChar != c && c != '_') {
                            getCurrentEventCollection().add(new CharacterSetIntervalEvent(j2, j));
                        }
                        j2 = j;
                        c = readChar;
                    }
                    j++;
                }
            }
            readChar = getReader().readChar();
        }
        if (c != '_') {
            getCurrentEventCollection().add(new CharacterSetIntervalEvent(j2, j));
        }
        this.currentLabelPos = j;
        getCurrentEventCollection().add(new PartEndEvent(EventContentType.CHARACTER_SET, false));
    }

    private static String extractGeneOrDomainID(String str) {
        Matcher matcher = GENE_COMMAND_PATTERN.matcher(str);
        if (matcher.matches()) {
            return "GENE." + matcher.group(1);
        }
        Matcher matcher2 = DOMAIN_COMMAND_PATTERN.matcher(str);
        if (matcher2.matches()) {
            return "DOMAIN." + matcher2.group(1);
        }
        return null;
    }

    private void createGeneOrDomainCharSetEvents() {
        getCurrentEventCollection().add(new LabeledIDEvent(EventContentType.CHARACTER_SET, extractGeneOrDomainID(this.currentGeneOrDomainName), this.currentGeneOrDomainName));
        getCurrentEventCollection().add(new CharacterSetIntervalEvent(this.currentGeneOrDomainStart, this.charactersRead));
        getCurrentEventCollection().add(new PartEndEvent(EventContentType.CHARACTER_SET, false));
    }

    private void readCommand() throws IOException {
        CharSequence sequence;
        int peek = getReader().peek();
        if (peek == -1 || ((char) peek) != '!') {
            return;
        }
        endSequence();
        getReader().read();
        consumeWhiteSpaceAndComments('[', ']');
        if (getReader().peekString(MEGAConstants.COMMAND_NAME_LABEL.length()).toUpperCase().equals(MEGAConstants.COMMAND_NAME_LABEL)) {
            createCharacterSetEventsFromLabel();
            return;
        }
        if (getReader().peekString("FORMAT".length()).toUpperCase().equals("FORMAT")) {
            readFormatCommand();
            return;
        }
        StringBuilder sb = new StringBuilder();
        do {
            sequence = getReader().readRegExp(READ_COMMAND_PATTERN, false).getSequence();
            if (sequence.charAt(sequence.length() - 1) == '[') {
                readComment('[', ']');
            }
            sb.append(sequence.subSequence(0, sequence.length() - 1));
        } while (sequence.charAt(sequence.length() - 1) != ';');
        String trim = sb.toString().trim();
        String upperCase = trim.toUpperCase();
        if (upperCase.startsWith("TITLE") || upperCase.startsWith(MEGAConstants.COMMAND_NAME_DESCRIPTION) || !(DOMAIN_COMMAND_PATTERN.matcher(trim).matches() || GENE_COMMAND_PATTERN.matcher(trim).matches())) {
            addMetaEventsFromCommand(trim);
            return;
        }
        if (this.currentGeneOrDomainName != null) {
            createGeneOrDomainCharSetEvents();
        }
        this.currentGeneOrDomainName = trim;
        this.currentGeneOrDomainStart = this.charactersRead;
    }

    private void readSequenceName() throws IOException {
        getReader().read();
        this.currentSequenceName = getReader().readRegExp(SEQUENCE_NAME_PATTERN, false).getSequence().toString().trim();
        if (this.firstSequenceName == null) {
            this.firstSequenceName = this.currentSequenceName;
        } else if (this.firstSequenceName.equals(this.currentSequenceName)) {
            this.currentLabelPos = Math.max(this.currentLabelPos, this.charactersRead);
        }
        getCurrentEventCollection().add(new LinkedLabeledIDEvent(EventContentType.SEQUENCE, this.sequenceIDToNameManager.getID(this.currentSequenceName), this.currentSequenceName, null));
    }

    private void countCharacters(JPhyloIOEvent jPhyloIOEvent) {
        if (jPhyloIOEvent.getType().getContentType().equals(EventContentType.SEQUENCE_TOKENS)) {
            SequenceTokensEvent asSequenceTokensEvent = jPhyloIOEvent.asSequenceTokensEvent();
            if (this.currentSequenceName.equals(this.firstSequenceName)) {
                this.charactersRead += asSequenceTokensEvent.getTokens().size();
            }
        }
    }

    private void endSequence() {
        if (this.currentSequenceName != null) {
            getCurrentEventCollection().add(new PartEndEvent(EventContentType.SEQUENCE, !this.isInterleaved));
            this.currentSequenceName = null;
        }
    }

    @Override // info.bioinfweb.jphyloio.AbstractEventReader
    protected void readNextEvent() throws IOException {
        if (isBeforeFirstAccess()) {
            checkStart();
            consumeWhiteSpaceAndComments('[', ']');
            getCurrentEventCollection().add(new ConcreteJPhyloIOEvent(EventContentType.DOCUMENT, EventTopologyType.START));
            return;
        }
        switch (getLastNonCommentEvent().getType().getContentType()) {
            case DOCUMENT:
                if (getLastNonCommentEvent().getType().getTopologyType().equals(EventTopologyType.START)) {
                    getCurrentEventCollection().add(new LinkedLabeledIDEvent(EventContentType.ALIGNMENT, ReadWriteConstants.DEFAULT_MATRIX_ID_PREFIX + getIDManager().createNewID(), null, null));
                    return;
                }
                return;
            case ALIGNMENT:
                if (getLastNonCommentEvent().getType().getTopologyType().equals(EventTopologyType.END)) {
                    getCurrentEventCollection().add(new ConcreteJPhyloIOEvent(EventContentType.DOCUMENT, EventTopologyType.END));
                    return;
                }
                break;
            case TOKEN_SET_DEFINITION:
            case SINGLE_TOKEN_DEFINITION:
            case SEQUENCE:
            case SEQUENCE_TOKENS:
            case CHARACTER_SET:
            case LITERAL_META:
            case COMMENT:
                break;
            default:
                throw new InternalError("Unexpected event type " + getLastNonCommentEvent().getType());
        }
        readCommand();
        consumeWhiteSpaceAndComments('[', ']');
        if (getCurrentEventCollection().isEmpty()) {
            int peek = getReader().peek();
            if (peek == -1) {
                endSequence();
                if (this.currentGeneOrDomainName == null) {
                    getCurrentEventCollection().add(new ConcreteJPhyloIOEvent(EventContentType.ALIGNMENT, EventTopologyType.END));
                    return;
                } else {
                    createGeneOrDomainCharSetEvents();
                    this.currentGeneOrDomainName = null;
                    return;
                }
            }
            if (peek == 35) {
                endSequence();
                readSequenceName();
                consumeWhiteSpaceAndComments('[', ']');
            }
            JPhyloIOEvent readCharacters = readCharacters(this.currentSequenceName, '[', ']');
            if (readCharacters == null) {
                readNextEvent();
            } else {
                consumeWhiteSpaceAndComments('[', ']');
                countCharacters(readCharacters);
            }
        }
    }
}
