<?xml version="1.0" encoding="UTF-8"?>
<s:scufl xmlns:s="http://org.embl.ebi.escience/xscufl/0.1alpha" version="0.2" log="0">
  <s:workflowdescription lsid="urn:lsid:www.mygrid.org.uk:operation:SSAVFCN82D1" author="Arnaud Kerhornou" title="Gene prediction workflow using geneid and SGP2. It then compares the predictions to reference annotations using gff2ps visualization tool. The reference annotations must be given as an input of the workflow." />
  <s:processor name="Decode_base64_to_byte">
    <s:local>org.embl.ebi.escience.scuflworkers.java.DecodeBase64</s:local>
  </s:processor>
  <s:processor name="StringArticleName" boring="true">
    <s:stringconstant>content</s:stringconstant>
  </s:processor>
  <s:processor name="Combine_SGP2_GeneID_predictions">
    <s:beanshell>
      <s:scriptvalue>import org.apache.xml.serialize.XMLSerializer;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;

        String OutObj="";
        String skel="&amp;lt;?xml version='1.0' encoding='ISO-8859-1'?&amp;gt;&amp;lt;moby:MOBY xmlns='http://www.biomoby.org/moby' xmlns:moby='http://www.biomoby.org/moby'&amp;gt;&amp;lt;moby:mobyContent&amp;gt;&amp;lt;moby:mobyData queryID='one'&amp;gt;&amp;lt;moby:Collection&amp;gt;&amp;lt;/moby:Collection&amp;gt;&amp;lt;/moby:mobyData&amp;gt;&amp;lt;/moby:mobyContent&amp;gt;&amp;lt;/moby:MOBY&amp;gt;";
        skel="&lt;?xml version='1.0' encoding='ISO-8859-1'?&gt;&lt;moby:MOBY xmlns='http://www.biomoby.org/moby' xmlns:moby='http://www.biomoby.org/moby'&gt;&lt;moby:mobyContent&gt;&lt;moby:mobyData queryID='one'&gt;&lt;moby:Collection&gt;&lt;/moby:Collection&gt;&lt;/moby:mobyData&gt;&lt;/moby:mobyContent&gt;&lt;/moby:MOBY&gt;";
	skel="&lt;moby:MOBY xmlns='http://www.biomoby.org/moby' xmlns:moby='http://www.biomoby.org/moby'&gt;&lt;moby:mobyContent&gt;&lt;moby:mobyData queryID='one'&gt;&lt;moby:Collection&gt;&lt;/moby:Collection&gt;&lt;/moby:mobyData&gt;&lt;/moby:mobyContent&gt;&lt;/moby:MOBY&gt;";
	
        DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();
        DocumentBuilder db = null;

        db = dbf.newDocumentBuilder();

        InputSource Source1    = new InputSource(new StringReader(MobyA));
        InputSource Source2    = new InputSource(new StringReader(MobyB));
	InputSource Source3    = new InputSource(new StringReader(MobyC));
        InputSource SourceSkel = new InputSource(new StringReader(skel));

        Document doc1      = null;
        Document doc2      = null;
	Document doc3      = null;
        Document docsalida = null;

        doc1 = db.parse(Source1);
        doc2 = db.parse(Source2);
	doc3 = db.parse(Source3);
        docsalida=db.parse(SourceSkel);

        XMLSerializer serializer = new XMLSerializer();
        StringWriter sw=new StringWriter();

        // First, Simples
        NodeList nodelist1an = doc1.getElementsByTagName("moby:Simple");
        NodeList nodelist1a  = doc1.getElementsByTagName("Simple");
        NodeList nodelist2an = doc2.getElementsByTagName("moby:Simple");
        NodeList nodelist2a  = doc2.getElementsByTagName("Simple");
	NodeList nodelist3an = doc3.getElementsByTagName("moby:Simple");
        NodeList nodelist3a  = doc3.getElementsByTagName("Simple");

        // Second, Collection
        NodeList nodelist1bn = doc1.getElementsByTagName("moby:Collection");
        NodeList nodelist1b  = doc1.getElementsByTagName("Collection");
        NodeList nodelist2bn = doc2.getElementsByTagName("moby:Collection");
        NodeList nodelist2b  = doc2.getElementsByTagName("Collection");
	NodeList nodelist3bn = doc3.getElementsByTagName("moby:Collection");
        NodeList nodelist3b  = doc3.getElementsByTagName("Collection");

        // We will iterate over these lists

        NodeList[] globalS={
                nodelist1an,
                nodelist1a,
                nodelist2an,
                nodelist2a,
                nodelist3an,
                nodelist3a
        };

        NodeList[] globalC={
                nodelist1bn,
                nodelist1b,
                nodelist2bn,
                nodelist2b,
                nodelist3bn,
                nodelist3b
        };

        NodeList nodelistfinal = docsalida.getElementsByTagName("moby:Collection");

        serializer.setOutputCharStream(sw);

        Node comienzo = nodelistfinal.item(0);

        // Common index variable for all this work
        int gi;
        int maxgi;

        // We are skipping over Collected Simples
        for(gi=0,maxgi=globalS.length;gi&lt;maxgi;gi++) {
            int i;
            int maxi;
            for(i=0,maxi=globalS[gi].getLength();i&lt;maxi;i++) {
                Node found=globalS[gi].item(i);
                String nname=found.getParentNode().getNodeName();

                if(! (nname.equals("moby:Collection") || nname.equals("Collection"))) {

                    // Remove the articleName from the Simple element
                    // As it's got to be attached now to the Colelction element
                    // NB: Let Taverna attach it automatically to the Collection element

                    found.getAttributes().removeNamedItem("moby:articleName");

                    comienzo.appendChild(docsalida.importNode(found,true));
                }
            }
        }

        for(gi=0,maxgi=globalC.length;gi&lt;maxgi;gi++) {
            int i;
            int maxi;
            for(i=0,maxi=globalC[gi].getLength();i&lt;maxi;i++) {
                Node found=globalC[gi].item(i);
                comienzo.appendChild(docsalida.importNode(found,true));
            }
        }

        // Last step!!!
        serializer.serialize(docsalida);
        OutObj = sw.toString();

       MobyOutput = OutObj;</s:scriptvalue>
      <s:beanshellinputlist>
        <s:beanshellinput s:syntactictype="'text/xml'">MobyA</s:beanshellinput>
        <s:beanshellinput s:syntactictype="'text/xml'">MobyB</s:beanshellinput>
        <s:beanshellinput s:syntactictype="'text/xml'">MobyC</s:beanshellinput>
      </s:beanshellinputlist>
      <s:beanshelloutputlist>
        <s:beanshelloutput s:syntactictype="'text/xml'">MobyOutput</s:beanshelloutput>
      </s:beanshelloutputlist>
      <s:dependencies s:classloader="iteration" />
    </s:beanshell>
  </s:processor>
  <s:processor name="fromFASTAToDNASequence_1">
    <s:description>Converts a DNA FASTA sequence into a DNA sequence</s:description>
    <s:biomobywsdl>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>fromFASTAToDNASequence</s:serviceName>
      <s:authorityName>genome.imim.es</s:authorityName>
    </s:biomobywsdl>
  </s:processor>
  <s:processor name="fromFASTAToDNASequence_2">
    <s:description>Converts a DNA FASTA sequence into a DNA sequence</s:description>
    <s:biomobywsdl>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>fromFASTAToDNASequence</s:serviceName>
      <s:authorityName>genome.imim.es</s:authorityName>
    </s:biomobywsdl>
  </s:processor>
  <s:processor name="runGeneIDGFF">
    <s:description>Ab initio gene prediction tool that returns the gene predictions in GFF format (GFF version 2).</s:description>
    <s:biomobywsdl>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>runGeneIDGFF</s:serviceName>
      <s:authorityName>genome.imim.es</s:authorityName>
      <s:Parameter s:name="strand">Both</s:Parameter>
      <s:Parameter s:name="profile">Homo sapiens (suitable for mammals)</s:Parameter>
      <s:Parameter s:name="engine">Normal</s:Parameter>
      <s:Parameter s:name="signals">None</s:Parameter>
      <s:Parameter s:name="exons">None</s:Parameter>
    </s:biomobywsdl>
  </s:processor>
  <s:processor name="runSGP2GFF">
    <s:description>Ab initio gene prediction service. It runs geneid with synteny evidences to improve the accuracy of the results and returns the output predictions in GFF format. To generate the synteny evidences, use a service that provides tblastx.</s:description>
    <s:biomobywsdl>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>runSGP2GFF</s:serviceName>
      <s:authorityName>genome.imim.es</s:authorityName>
      <s:Parameter s:name="profile">Human Vs Mouse</s:Parameter>
    </s:biomobywsdl>
  </s:processor>
  <s:processor name="Parse_Moby_Data_GFF1">
    <s:description>Processor to parse the datatype GFF</s:description>
    <s:biomobyparser>
      <s:endpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:endpoint>
      <s:datatype>GFF</s:datatype>
      <s:articleName>geneid_predictions</s:articleName>
      <s:description>Processor to parse the datatype GFF</s:description>
    </s:biomobyparser>
  </s:processor>
  <s:processor name="Parse_Moby_Data_b64_encoded_jpeg">
    <s:description>Processor to parse the datatype b64_encoded_jpeg</s:description>
    <s:biomobyparser>
      <s:endpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:endpoint>
      <s:datatype>b64_encoded_jpeg</s:datatype>
      <s:articleName>image</s:articleName>
      <s:description>Processor to parse the datatype b64_encoded_jpeg</s:description>
    </s:biomobyparser>
  </s:processor>
  <s:processor name="runWUTBlastx_2Seqs">
    <s:description>It compares the six-frame translations of a nucleotide query sequence against the six-frame translations of a nucleotide sequence (used as a database) using the Washington University BLAST algorithm.</s:description>
    <s:biomobywsdl>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>runWUTBlastx_2Seqs</s:serviceName>
      <s:authorityName>inb.bsc.es</s:authorityName>
      <s:Parameter s:name="nogaps">off</s:Parameter>
      <s:Parameter s:name="gap_open">9</s:Parameter>
      <s:Parameter s:name="filter">none</s:Parameter>
      <s:Parameter s:name="statistics">sump</s:Parameter>
      <s:Parameter s:name="word_size">3</s:Parameter>
      <s:Parameter s:name="alignments">15</s:Parameter>
      <s:Parameter s:name="matrix">blosum62</s:Parameter>
      <s:Parameter s:name="expected_threshold">10.0</s:Parameter>
      <s:Parameter s:name="scores">25</s:Parameter>
      <s:Parameter s:name="gap_extension">2</s:Parameter>
    </s:biomobywsdl>
  </s:processor>
  <s:processor name="String_1">
    <s:description>a string</s:description>
    <s:biomobyobject>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>String</s:serviceName>
      <s:authorityName />
    </s:biomobyobject>
  </s:processor>
  <s:processor name="runGFF2JPEG">
    <s:description>Runs gff2ps software to vizualize a set of GFF annotations maps. It returns an encoded image in JPEG format.</s:description>
    <s:biomobywsdl>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>runGFF2JPEG</s:serviceName>
      <s:authorityName>genome.imim.es</s:authorityName>
    </s:biomobywsdl>
  </s:processor>
  <s:processor name="String_2">
    <s:description>a string</s:description>
    <s:biomobyobject>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>String</s:serviceName>
      <s:authorityName />
    </s:biomobyobject>
  </s:processor>
  <s:processor name="Parse_Moby_Data_BLAST_Text">
    <s:description>Processor to parse the datatype BLAST-Text</s:description>
    <s:biomobyparser>
      <s:endpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:endpoint>
      <s:datatype>BLAST-Text</s:datatype>
      <s:articleName>wutblastx_report</s:articleName>
      <s:description>Processor to parse the datatype BLAST-Text</s:description>
    </s:biomobyparser>
  </s:processor>
  <s:processor name="FASTA_NA_2">
    <s:description>NA sequence in FASTA format</s:description>
    <s:biomobyobject>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>FASTA_NA</s:serviceName>
      <s:authorityName />
    </s:biomobyobject>
  </s:processor>
  <s:processor name="Parse_Moby_Data_GFF">
    <s:description>Processor to parse the datatype GFF</s:description>
    <s:biomobyparser>
      <s:endpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:endpoint>
      <s:datatype>GFF</s:datatype>
      <s:articleName>geneid_predictions</s:articleName>
      <s:description>Processor to parse the datatype GFF</s:description>
    </s:biomobyparser>
  </s:processor>
  <s:processor name="FASTA_NA_1">
    <s:description>NA sequence in FASTA format</s:description>
    <s:biomobyobject>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>FASTA_NA</s:serviceName>
      <s:authorityName />
    </s:biomobyobject>
  </s:processor>
  <s:processor name="String_3">
    <s:description>a string</s:description>
    <s:biomobyobject>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>String</s:serviceName>
      <s:authorityName />
    </s:biomobyobject>
  </s:processor>
  <s:processor name="ReferenceGenePrediction_GFF">
    <s:description>Generic Feature Format File (of any subtype)</s:description>
    <s:biomobyobject>
      <s:mobyEndpoint>http://moby.ucalgary.ca/moby/MOBY-Central.pl</s:mobyEndpoint>
      <s:serviceName>GFF</s:serviceName>
      <s:authorityName />
    </s:biomobyobject>
  </s:processor>
  <s:link source="fasta_sequence_1" sink="String_1:value" />
  <s:link source="fasta_sequence_2" sink="String_2:value" />
  <s:link source="Combine_SGP2_GeneID_predictions:MobyOutput" sink="runGFF2JPEG:GFF(Collection - 'maps')" />
  <s:link source="Parse_Moby_Data_b64_encoded_jpeg:image_'content'" sink="Decode_base64_to_byte:base64" />
  <s:link source="ReferenceGenePrediction_GFF:mobyData" sink="Combine_SGP2_GeneID_predictions:MobyC" />
  <s:link source="StringArticleName:value" sink="String_1:article name" />
  <s:link source="StringArticleName:value" sink="String_2:article name" />
  <s:link source="String_1:mobyData" sink="FASTA_NA_1:String(content)" />
  <s:link source="String_2:mobyData" sink="FASTA_NA_2:String(content)" />
  <s:link source="identifier_1" sink="FASTA_NA_1:id" />
  <s:link source="identifier_2" sink="FASTA_NA_2:id" />
  <s:link source="namespace" sink="FASTA_NA_1:namespace" />
  <s:link source="namespace" sink="FASTA_NA_2:namespace" />
  <s:link source="FASTA_NA_1:mobyData" sink="fromFASTAToDNASequence_1:FASTA_NA(sequence)" />
  <s:link source="FASTA_NA_2:mobyData" sink="fromFASTAToDNASequence_2:FASTA_NA(sequence)" />
  <s:link source="String_3:mobyData" sink="ReferenceGenePrediction_GFF:String(content)" />
  <s:link source="fromFASTAToDNASequence_1:DNASequence(sequence)" sink="runGeneIDGFF:DNASequence(sequence)" />
  <s:link source="fromFASTAToDNASequence_1:DNASequence(sequence)" sink="runSGP2GFF:DNASequence(sequence)" />
  <s:link source="fromFASTAToDNASequence_1:DNASequence(sequence)" sink="runWUTBlastx_2Seqs:NucleotideSequence(sequence)" />
  <s:link source="gff_reference_gene_predictions" sink="String_3:value" />
  <s:link source="identifier_ref" sink="ReferenceGenePrediction_GFF:id" />
  <s:link source="namespace" sink="ReferenceGenePrediction_GFF:namespace" />
  <s:link source="StringArticleName:value" sink="String_3:article name" />
  <s:link source="Decode_base64_to_byte:bytes" sink="gene_predictions_maps" />
  <s:link source="Parse_Moby_Data_BLAST_Text:wutblastx_report_'content'" sink="tblastx_report" />
  <s:link source="Parse_Moby_Data_GFF1:geneid_predictions_'content'" sink="geneid_gff" />
  <s:link source="Parse_Moby_Data_GFF:geneid_predictions_'content'" sink="sgp2_gff" />
  <s:link source="fromFASTAToDNASequence_2:DNASequence(sequence)" sink="runWUTBlastx_2Seqs:NucleotideSequence(database)" />
  <s:link source="runGFF2JPEG:b64_encoded_jpeg(image)" sink="Parse_Moby_Data_b64_encoded_jpeg:mobyData('b64_encoded_jpeg')" />
  <s:link source="runGeneIDGFF:GFF(geneid_predictions)" sink="Combine_SGP2_GeneID_predictions:MobyB" />
  <s:link source="runGeneIDGFF:GFF(geneid_predictions)" sink="Parse_Moby_Data_GFF1:mobyData('GFF')" />
  <s:link source="runSGP2GFF:GFF(geneid_predictions)" sink="Combine_SGP2_GeneID_predictions:MobyA" />
  <s:link source="runSGP2GFF:GFF(geneid_predictions)" sink="Parse_Moby_Data_GFF:mobyData('GFF')" />
  <s:link source="runWUTBlastx_2Seqs:BLAST-Text(wutblastx_report)" sink="Parse_Moby_Data_BLAST_Text:mobyData('BLAST-Text')" />
  <s:link source="runWUTBlastx_2Seqs:BLAST-Text(wutblastx_report)" sink="runSGP2GFF:BLAST-Text(tblastx_report)" />
  <s:source name="namespace" />
  <s:source name="fasta_sequence_1" />
  <s:source name="identifier_1" />
  <s:source name="fasta_sequence_2" />
  <s:source name="identifier_2" />
  <s:source name="identifier_ref" />
  <s:source name="gff_reference_gene_predictions">
    <s:metadata>
      <s:mimeTypes>
        <s:mimeType>text/xml</s:mimeType>
      </s:mimeTypes>
    </s:metadata>
  </s:source>
  <s:sink name="sgp2_gff" />
  <s:sink name="tblastx_report" />
  <s:sink name="geneid_gff" />
  <s:sink name="gene_predictions_maps">
    <s:metadata>
      <s:mimeTypes>
        <s:mimeType>image/*</s:mimeType>
      </s:mimeTypes>
    </s:metadata>
  </s:sink>
</s:scufl>

