package io.flatfiles;

import annotations.enums.Species;
import annotations.motifs.MotifUtilities;
import annotations.motifs.ScorableSeq;
import annotations.motifs.SeqMotifAnno;
import annotations.motifs.SequenceMotif;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import settings.StaticSettings;

/* loaded from: input_file:io/flatfiles/JasparParser.class */
public class JasparParser {
    private static File jFileList = new File("C:\\Users\\Applecore\\Desktop\\JASPAR\\JASPAR_matrix_list.txt");
    private static File jFileMatrix = new File("C:\\Users\\Applecore\\Desktop\\JASPAR\\JASPAR_matrix_only.txt");
    private static File spFile = new File("C:\\Users\\Applecore\\Desktop\\JASPAR\\UNIPROT_species.txt");
    private static String outFileDir = "C:\\Users\\Applecore\\Desktop\\JASPAR\\";

    /* loaded from: input_file:io/flatfiles/JasparParser$JasparEntry.class */
    public class JasparEntry {
        private final String name;
        private final String commentLine;
        private final Map<String, String> commentMap;

        public JasparEntry(String str) throws Exception {
            String[] split = str.split("\\t");
            if (split.length != 5) {
                throw new Exception("Invalid line: " + str);
            }
            this.name = split[0].trim();
            this.commentLine = split[4].trim().replaceAll("^\\s*;\\s*", "");
            String[] split2 = this.commentLine.split(" ; ");
            this.commentMap = new HashMap();
            Pattern compile = Pattern.compile("\\s*(\\w+)\\s+\"(.*?)\"\\s*");
            for (int i = 0; i < split2.length; i++) {
                if (!split2[i].trim().isEmpty()) {
                    Matcher matcher = compile.matcher(split2[i]);
                    if (!matcher.find()) {
                        throw new Exception("Invalid comment entry: " + split2[i]);
                    }
                    if (!matcher.group(2).trim().isEmpty() && !matcher.group(2).trim().equals("-")) {
                        this.commentMap.put(matcher.group(1).trim(), matcher.group(2).trim());
                    }
                }
            }
        }

        public int[] getMedlineIDIfAvailable() {
            if (!this.commentMap.containsKey("medline")) {
                return new int[0];
            }
            String[] split = this.commentMap.get("medline").split(",");
            int[] iArr = new int[split.length];
            for (int i = 0; i < iArr.length; i++) {
                iArr[i] = Integer.parseInt(split[i]);
            }
            return iArr;
        }

        public int[] getSpeciesIDIfAvailable() {
            if (!this.commentMap.containsKey("species")) {
                return new int[0];
            }
            String[] split = this.commentMap.get("species").split(",");
            int[] iArr = new int[split.length];
            for (int i = 0; i < iArr.length; i++) {
                iArr[i] = Integer.parseInt(split[i]);
            }
            return iArr;
        }
    }

    public static void convertFiles() throws Exception {
        String str;
        Map<String, double[][]> nameToBaseCount = getNameToBaseCount(true);
        Map<Integer, String[]> speciesInfo = getSpeciesInfo();
        Map<String, JasparEntry> nameToJasparEntry = getNameToJasparEntry();
        ArrayList arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        int i = 0;
        HashMap hashMap2 = new HashMap();
        for (String str2 : nameToBaseCount.keySet()) {
            String str3 = str2.split("\\s+")[0];
            double[][] dArr = nameToBaseCount.get(str2);
            JasparEntry jasparEntry = nameToJasparEntry.get(str3);
            if (jasparEntry == null) {
                System.out.println("NOT FOUND: " + str3);
            } else {
                String str4 = "[JP] " + str2;
                int[] medlineIDIfAvailable = jasparEntry.getMedlineIDIfAvailable();
                for (int i2 = 0; i2 < medlineIDIfAvailable.length; i2++) {
                    if (!hashMap.containsKey(Integer.valueOf(medlineIDIfAvailable[i2]))) {
                        hashMap.put(Integer.valueOf(medlineIDIfAvailable[i2]), new HashSet());
                    }
                    ((Set) hashMap.get(Integer.valueOf(medlineIDIfAvailable[i2]))).add(str4);
                }
                int[] speciesIDIfAvailable = jasparEntry.getSpeciesIDIfAvailable();
                String[] strArr = new String[speciesIDIfAvailable.length];
                if (speciesIDIfAvailable.length != 0) {
                    for (int i3 = 0; i3 < speciesIDIfAvailable.length; i3++) {
                        if (speciesInfo.containsKey(Integer.valueOf(speciesIDIfAvailable[i3]))) {
                            strArr[i3] = speciesInfo.get(Integer.valueOf(speciesIDIfAvailable[i3]))[2];
                        } else {
                            System.out.println("TAXON ID NOT FOUND: " + speciesIDIfAvailable[i3]);
                        }
                    }
                }
                String str5 = "";
                if (strArr.length == 0) {
                    str = "Other";
                } else if (strArr.length == 1) {
                    str = strArr[0];
                } else {
                    boolean z = false;
                    boolean z2 = false;
                    for (int i4 = 0; i4 < strArr.length; i4++) {
                        String trim = strArr[i4].trim();
                        if (i4 > 0) {
                            str5 = str5 + ", ";
                        }
                        str5 = str5 + trim;
                        if (trim.equalsIgnoreCase("Homo sapiens")) {
                            z = true;
                        }
                        if (trim.equalsIgnoreCase("Mus musculus")) {
                            z2 = true;
                        }
                    }
                    if (z) {
                        str = "Homo sapiens";
                    } else {
                        if (!z2) {
                            throw new Exception("Invalid multi-species group (need to add code)");
                        }
                        str = "Mus musculus";
                    }
                }
                Species species = Species.getSpecies(str);
                if (species == Species.OTHER && !str.equals("Other")) {
                    System.out.println(str + "\t" + species.getFullName());
                }
                if (str4.length() > StaticSettings.MAX_NAME_SIZE) {
                    throw new Exception("NAME TOO LONG: " + str4);
                }
                String str6 = "JASPAR database comment line: " + jasparEntry.commentLine;
                if (strArr.length > 1) {
                    i++;
                    str6 = "Motif entry was assigned to more than one species: " + str5 + ". (Only one is kept in MochiView.) " + str6;
                }
                if (str6.length() > StaticSettings.MAX_DESC_SIZE) {
                    System.out.println("TOO LONG: " + str6);
                    str6 = str6.substring(0, StaticSettings.MAX_DESC_SIZE - 3) + "...";
                }
                SequenceMotif sequenceMotif = new SequenceMotif(str4, SequenceMotif.createFrequencyMatrix(dArr, false), null);
                SeqMotifAnno seqMotifAnno = new SeqMotifAnno();
                seqMotifAnno.setSourceSpecies(species);
                seqMotifAnno.setDescription(str6);
                sequenceMotif.setOptionalAnnotation(seqMotifAnno);
                arrayList.add(sequenceMotif);
                if (hashMap2.containsKey(species)) {
                    hashMap2.put(species, Integer.valueOf(((Integer) hashMap2.get(species)).intValue() + 1));
                } else {
                    hashMap2.put(species, 1);
                }
            }
        }
        Collections.sort(arrayList, new Comparator<ScorableSeq>() { // from class: io.flatfiles.JasparParser.1
            @Override // java.util.Comparator
            public int compare(ScorableSeq scorableSeq, ScorableSeq scorableSeq2) {
                Species sourceSpecies = scorableSeq.getOptionalAnnotation().getSourceSpecies();
                Species sourceSpecies2 = scorableSeq2.getOptionalAnnotation().getSourceSpecies();
                return sourceSpecies != sourceSpecies2 ? sourceSpecies.getCompleteName().compareTo(sourceSpecies2.getCompleteName()) : scorableSeq.getName().compareTo(scorableSeq2.getName());
            }
        });
        ExportMotifFrequenciesFlatfile.exportMotifs(arrayList, new File(outFileDir + "JASPAR_motifs.txt"), getHeader(i, arrayList, hashMap2, null));
        for (Species species2 : hashMap2.keySet()) {
            if (((Integer) hashMap2.get(species2)).intValue() >= 10) {
                ExportMotifFrequenciesFlatfile.exportMotifs(getMotifs(arrayList, species2), new File(outFileDir + "JASPAR_motifs_" + species2.getUnderscoredName() + ".txt"), getHeader(i, arrayList, hashMap2, species2));
            }
        }
        int i5 = 0;
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(outFileDir + "JASPAR_extracted_references.txt")));
        for (Integer num : hashMap.keySet()) {
            ArrayList arrayList2 = new ArrayList((Collection) hashMap.get(num));
            Collections.sort(arrayList2);
            if (i5 > 0) {
                bufferedWriter.newLine();
            }
            bufferedWriter.write("----------------------------------------------\n");
            bufferedWriter.write("# MEDLINE ID: " + num + "\n");
            Iterator it = arrayList2.iterator();
            while (it.hasNext()) {
                bufferedWriter.write("#    MOTIF: " + ((String) it.next()) + "\n");
            }
            bufferedWriter.write("----------------------------------------------\n");
            i5++;
            System.out.println("Extracting reference for medline ID: " + num);
            try {
                InputStream openStream = new URL("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=" + num + "&retmode=text&rettype=abstract").openStream();
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(openStream));
                boolean z3 = false;
                while (true) {
                    String readLine = bufferedReader.readLine();
                    String str7 = readLine;
                    if (readLine != null) {
                        if (z3 || !str7.isEmpty()) {
                            if (!z3) {
                                str7 = str7.replaceAll("^1: ", "");
                                z3 = true;
                            }
                            bufferedWriter.write(str7);
                            bufferedWriter.newLine();
                        }
                    }
                }
                bufferedReader.close();
                openStream.close();
            } catch (MalformedURLException e) {
                throw new IOException("Could not download file.  Maybe it no longer exists?");
            } catch (IOException e2) {
                throw new IOException("Could not download file.  Maybe it no longer exists?");
            }
        }
        bufferedWriter.close();
    }

    public static Map<String, JasparEntry> getNameToJasparEntry() throws Exception {
        HashMap hashMap = new HashMap();
        JasparParser jasparParser = new JasparParser();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(jFileList));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return hashMap;
            }
            jasparParser.getClass();
            JasparEntry jasparEntry = new JasparEntry(readLine);
            if (hashMap.containsKey(jasparEntry.name)) {
                throw new Exception("Duplicate name: " + jasparEntry.name);
            }
            hashMap.put(jasparEntry.name, jasparEntry);
        }
    }

    public static Map<Integer, String[]> getSpeciesInfo() throws Exception {
        HashMap hashMap = new HashMap();
        Pattern compile = Pattern.compile("^(\\w+)\\s+(\\w)\\s+(\\d+): N=(.*)$");
        BufferedReader bufferedReader = new BufferedReader(new FileReader(spFile));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return hashMap;
            }
            Matcher matcher = compile.matcher(readLine);
            if (matcher.find()) {
                hashMap.put(Integer.valueOf(Integer.parseInt(matcher.group(3))), new String[]{matcher.group(1), matcher.group(2), matcher.group(4)});
            }
        }
    }

    /* JADX WARN: Type inference failed for: r0v35, types: [double[], double[][]] */
    public static Map<String, double[][]> getNameToBaseCount(boolean z) throws Exception {
        String trim;
        HashMap hashMap = new HashMap();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(jFileMatrix));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return hashMap;
            }
            trim = readLine.trim();
            if (trim.startsWith(">")) {
                double[] parseCountLine = parseCountLine(bufferedReader.readLine(), "A", z);
                double[] parseCountLine2 = parseCountLine(bufferedReader.readLine(), "C", z);
                double[] parseCountLine3 = parseCountLine(bufferedReader.readLine(), "G", z);
                double[] parseCountLine4 = parseCountLine(bufferedReader.readLine(), "T", z);
                if (parseCountLine.length != parseCountLine2.length || parseCountLine3.length != parseCountLine4.length || parseCountLine.length != parseCountLine3.length) {
                    break;
                }
                String substring = trim.substring(1);
                if (hashMap.containsKey(substring)) {
                    throw new Exception("Name already exists: " + substring);
                }
                hashMap.put(substring, MotifUtilities.transposeMatrix(new double[]{parseCountLine, parseCountLine2, parseCountLine3, parseCountLine4}));
            }
        }
        throw new Exception("Invalid matrix lengths..." + trim);
    }

    private static double[] parseCountLine(String str, String str2, boolean z) throws Exception {
        String replaceAll = str.replaceAll("[\\[\\]]", "");
        String[] split = replaceAll.split("\\s+");
        if (!split[0].equals(str2)) {
            throw new Exception("INVALID LINE: " + replaceAll);
        }
        double[] dArr = new double[split.length - 1];
        for (int i = 1; i < split.length; i++) {
            double parseDouble = Double.parseDouble(split[i]);
            if (z) {
                parseDouble += 1.0d;
            }
            dArr[i - 1] = parseDouble;
        }
        return dArr;
    }

    private static List<ScorableSeq> getMotifs(List<ScorableSeq> list, Species species) {
        ArrayList arrayList = new ArrayList();
        for (ScorableSeq scorableSeq : list) {
            if (scorableSeq.getOptionalAnnotation().getSourceSpecies() == species) {
                arrayList.add(scorableSeq);
            }
        }
        return arrayList;
    }

    private static String getHeader(int i, List<ScorableSeq> list, Map<Species, Integer> map, Species species) {
        int size = species == null ? list.size() : map.get(species).intValue();
        StringBuilder sb = new StringBuilder();
        sb.append("# " + NumberFormat.getInstance().format(size) + " motifs\n");
        sb.append("#\n");
        sb.append("# This library was provided courtesy of Boris Lenhard and the JASPAR database (http://jaspar.cgb.ki.se/) on October 15, 2009.\n");
        sb.append("#\n");
        sb.append("# The motifs were parsed from the files available on the JASPAR ftp server:\n");
        sb.append("#    [1] http://jaspar.genereg.net/html/DOWNLOAD/all_data/matrix_only/matrix_only.txt\n");
        sb.append("#    [2] http://jaspar.genereg.net/html/DOWNLOAD/all_data/FlatFileDir/matrix_list.txt\n");
        sb.append("# A pseudocount was added to each count in the base count matrix prior to conversion to a frequency matrix.\n");
        if (i > 0) {
            sb.append("# Note that JASPAR allows multiple species to be assigned to a motif, whereas MochiView does not.\n");
            sb.append("# In the " + NumberFormat.getInstance().format(i) + " cases where this occurred, the species list is provided in the description and precedence\n");
            sb.append("# was given to Homo sapiens and then Mus musculus (this covered all cases).\n");
        }
        sb.append("#\n");
        if (species != null) {
            sb.append("# " + NumberFormat.getInstance().format(map.keySet().size()) + " species are represented in the JASPAR library");
            sb.append(" (this file only includes entries assigned to " + species.getCompleteName() + "):\n");
        } else {
            sb.append("# " + NumberFormat.getInstance().format(map.keySet().size()) + " species are represented in the JASPAR library:\n");
        }
        ArrayList<Species> arrayList = new ArrayList(map.keySet());
        Species.sortByName(arrayList);
        for (Species species2 : arrayList) {
            sb.append("#    " + species2.getCompleteName() + ": " + NumberFormat.getInstance().format(map.get(species2)) + " motif(s)\n");
        }
        sb.append("#\n");
        sb.append("# JASPAR: an open-access database for eukaryotic transcription factor binding profiles\n");
        sb.append("#    Nucleic Acids Res. 2004 Jan 1; 32(Database issue):D91-4\n");
        sb.append("#    Sandelin A, Alkema W, Engstrom P, Wasserman WW, Lenhard B.\n");
        sb.append("# JASPAR, the open access database of transcription factor-binding profiles: new content and tools in the 2008 update\n");
        sb.append("#    Bryne JC, Valen E, Tang MH, Marstrand T, Winther O, da Piedade I, Krogh A, Lenhard B, Sandelin A.\n");
        sb.append("#    Nucleic Acids Res. 2008 Jan;36(Database issue):D102-6. Epub 2007 Nov 15.\n");
        sb.append("#\n");
        sb.append("# Medline IDs for individual motifs can be found in the description lines of each motif. Full abstracts\n");
        sb.append("#  and references for these IDs (matched to motif name) are available on the MochiView website.\n");
        sb.append("#\n");
        sb.append("\n");
        return sb.toString();
    }

    public static void main(String[] strArr) {
        try {
            convertFiles();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
