import java.io.*;
import java.util.*;
import java.lang.*;
import java.lang.Math.*;
import java.text.*;
import java.lang.String.*;

/**
*templateListParser - download the list of teamplates that have biologically relevant ligands and superposes them with the top model 
*/

public class templateListParser
{
	Runtime r = Runtime.getRuntime();
	Process p = null;
	float relevanttemplatenumb = 0;
	float sumTMscore = 0;
	float bioreltemplates = 0;
	float meanTMscore = 0;
	float bioreltemplatescore = 0;
	float supnum = 0; 
	String template = "";
	Vector templateVect = new Vector();
	Vector relevantTemplateVect = new Vector();
	//String templ = "";
	
	public templateListParser( String outputdirectory, String teamplatelist, String top_model, String target )
	{
		try
		{
			HashSet relevantLigandSet = new HashSet();
			InputStream is1 = templateListParser.class.getResourceAsStream("/uniprotLigandListExtended_classified.txt");
			BufferedReader in1 = new BufferedReader(new InputStreamReader(is1));
			String ligline = in1.readLine();

			//System.out.println("Reading in lig File");
			while (null != (ligline = in1.readLine()))
			{
				if( !ligline.startsWith( "#" ) && !ligline.startsWith( "//" ) )
				{
					//System.out.println("ligline "+ ligline );

					if( ligline.length() > 0 )
					{
						//System.out.println( ligline );
						String ligandabbr =  ligline.substring( 0, ligline.indexOf("=") );
						//System.out.println( "ligandabbr " + ligandabbr );
						relevantLigandSet.add( ligandabbr );
					}
				}
			}
			in1.close();
			
			//System.out.println("Reading in File");

			FileReader file1 = new FileReader( teamplatelist );
			BufferedReader buff1 = new BufferedReader(file1);
			String line1 = buff1.readLine();

			//System.out.println("line1 "+  line1);
  
			do
			{				

				StringTokenizer templatetokes = new StringTokenizer( line1, " " );
				String templatePDBID = templatetokes.nextToken();
				templateVect.add(templatePDBID);
				//System.out.println("the line 1 " + line1);
				line1 = buff1.readLine();
				//System.out.println("The templateVect " + templateVect);
				
				
			}
			while( line1 != null);
			buff1.close();

			for( int v = 0; v < templateVect.size(); v++ )
			{
				String template = (String)templateVect.elementAt( v );
				String templatePDB = template.substring( 0, 4 );
				//System.out.println("templatePDB "+  templatePDB);
				char chain = 'A';
				if( template.length() > 4 )
					chain = template.toUpperCase().charAt( 4 );
				
				//System.out.println( templatePDB + " " + chain );
				
				
				//download PDB file
				if( templateVect.size() > 0 )
				{
					if( !( new File( outputdirectory +"/pdb"+templatePDB+".ent") ).exists() )
					{
						//System.out.println( "Downloading " + templatePDB + "..."); 
						//wget PDB file
						String midstr = templatePDB.substring( 1, 3 );
						//System.out.println("wget -q ftp://ftp.wwpdb.org/pub/pdb/data/structures/divided/pdb/"+midstr+"/pdb"+templatePDB+".ent.gz -O "+outputdirectory + "/pdb"+templatePDB+".ent.gz" );
						p = r.exec( "wget -q ftp://ftp.wwpdb.org/pub/pdb/data/structures/divided/pdb/"+midstr+"/pdb"+templatePDB+".ent.gz -O "+outputdirectory + "/pdb"+templatePDB+".ent.gz" );
						p.waitFor();
						p.destroy();
				
						//gunzip file
						//System.out.println("gunzip "+outputdirectory +"/pdb"+templatePDB+".ent.gz");
						p = r.exec( "gunzip "+outputdirectory +"/pdb"+templatePDB+".ent.gz" );
						p.waitFor();
						p.destroy();
					}
					
					//get coords for records that agree
					if( ( new File( outputdirectory +"/pdb"+templatePDB+".ent") ).exists() )
					{
						
						//System.out.println( "Finding coords for biologically relevant ligands in " + templatePDB + " chain " + chain + "...");
						BufferedReader in = new BufferedReader( new FileReader( outputdirectory +"/pdb"+templatePDB+".ent" ) );
						String line = in.readLine();
						StringBuffer coordsbuf = new StringBuffer();
						boolean relevant = false;
						do
						{
							if( line.startsWith( "ATOM" ) || line.startsWith( "HETATM" ) )
							{
								if( line.charAt( 21 ) ==  chain )
								{
									if( line.startsWith( "ATOM" ) )
									{
									      coordsbuf.append(line+"\n");
									}
									
									//get type of HETATM only output coords of relevant ligands
									if( line.startsWith( "HETATM" ) )
									{
										//get type of HETATM
										String het_type = line.substring( 17, 20 ).trim();
										
										if( relevantLigandSet.contains( het_type ) )
										{
											coordsbuf.append(line+"\n");
											relevant = true;

											//System.out.println("HETATM " + line);
										}
									}
								}	
							}
							
							line = in.readLine();
						}
						while( line != null && !line.startsWith( "ENDMDL" ) && !line.startsWith( "MODEL        2" ) );
						in.close();
						
						//See if template has the same fold to top model 
						if( relevant )
						{
							//output coords to a new PDB file
							DataOutputStream out = new DataOutputStream( new FileOutputStream( outputdirectory + "/" + templatePDB + chain + "_lig.pdb" ) );
							out.writeBytes( coordsbuf.toString() );
							out.close();
							
							//check TMscore with top model - only use templates with the same fold
							runTMalign rtm = new runTMalign( top_model, templatePDB + chain + "_lig.pdb",  outputdirectory );
							//System.out.println(top_model + " " + templatePDB + " " + chain + "_lig.pdb" + outputdirectory );
							float tm = rtm.getTMscore();
							//System.out.println(tm);
							if(tm>=0.4)
							{
								//add file to relevantTemplateVect
								relevantTemplateVect.addElement( templatePDB + chain + "_lig" );

								
							}
						}

						//System.out.println("rel temp vect " + relevantTemplateVect);
						
						if( relevantTemplateVect.size() > 0 )//&& !( new File( outputdirectory + "/" + templatePDB +chain+ "_lig.pdb" ) ).exists() ) //COMMENTED THIS OUT
						{
                            //fix "bug" with new version of pymol, which checks file extensions are .pdb annoyingly!
                            String[] cpfix = {"/bin/bash", "-c", "cd " + outputdirectory + ";cp " + top_model + " " + top_model + ".pdb" };
                            p = r.exec( cpfix );
                            p.waitFor();
                            p.destroy();
                            
							String listreltemps = top_model + ".pdb ";
							String listrelsup = "";
							String aligncommands = "";
							int supnum = 0;
							
							//System.out.println(" vect " + relevantTemplateVect);
							
							for( int i = 0; i < relevantTemplateVect.size(); i++)
							{
								supnum++;

								//System.out.println();
								String reltemplate =  (String)relevantTemplateVect.elementAt( i );
								//System.out.println("reltemplate " + reltemplate);
								runTMalign2 rtm2 = new runTMalign2( reltemplate + ".pdb", top_model, outputdirectory, "TM.sup" + supnum );
								//System.out.println(reltemplate + ".pdb " + top_model + " " + outputdirectory + "TM.sup" + supnum);
								parseSupFile( outputdirectory + "TM.sup" + supnum + "_all" );//output reorientated template CA coordinates as TM.sup*_all_filt.pdb
								//System.out.println(outputdirectory + "TM.sup" + supnum + "_all");
								listrelsup = listrelsup + "TM.sup" + supnum + "_all_filt.pdb ";
								//System.out.println(listrelsup + "TM.sup" + supnum + "_all_filt ");
								listreltemps = listreltemps + reltemplate + ".pdb ";
								//System.out.println(listreltemps + reltemplate + ".pdb ");
								aligncommands = aligncommands + "align "+ reltemplate + ", " + "TM.sup" + supnum + "_all_filt; ";
								//System.out.println(aligncommands + "align "+ reltemplate + ", " + "TM.sup" + supnum + "_all_filt; ");
							}
							
							if( !( new File( outputdirectory+"/"+target +"_lig.pdb") ).exists() )
							{
								String[] runpymol = {"/bin/bash", "-c", "cd " + outputdirectory + ";$PYMOL_HOME/pymol -c " + listreltemps + listrelsup + "-d '" + aligncommands + " save " + target +"_lig.pdb' > runpymol.out.log 2> runpymol.err.log" };
								System.out.println( "Aligning templates and ligands using pymol...\n$PYMOL_HOME/pymol -c " + listreltemps + listrelsup + "-d '" + aligncommands + " save " + target +"_lig.pdb' > runpymol.out.log 2> runpymol.err.log" );
								p = r.exec( runpymol );
								p.waitFor();
								p.destroy();
							}
						}	
					}
				}
			}
			
		}
	
		catch( Exception e )
		{
			System.out.println( "Error executing templateListParser!" + e );
		}	
	}
	
	public void parseSupFile( String supfilename )
	{
		try
		{
			BufferedReader in = new BufferedReader( new FileReader( supfilename ) );
			//System.out.println("supfilename "+ supfilename);
			DataOutputStream out = new DataOutputStream( new FileOutputStream( supfilename + "_filt.pdb" ) );
			//System.out.println( " supfile name plus filt" + supfilename + "_filt");
			String line = in.readLine();
			//System.out.println();

			do
			{
				if( line.startsWith( "ATOM" ) )
				{
					out.writeBytes( line + "\n" );
					//System.out.println("ATOM line " + line);
				}
				line = in.readLine();
			}
			while( line != null && !line.startsWith( "TER" ) );
			in.close();
			out.close();
		}
		catch( Exception e )
		{
			System.err.println( e );
		}
	}
	
	public Vector gettemplatevect()
	{
		return templateVect;
	}
	
	public Vector getrelevantTemplateVect()
	{
		return relevantTemplateVect;
	}
	
	public static void main( String args[])
	{
		templateListParser tmplp = new templateListParser( args[0], args[1], args[2], args[3] );

		Vector templatevect = (Vector)tmplp.gettemplatevect();
		Vector relevantTemplateVect = (Vector)tmplp.getrelevantTemplateVect();
	}
}
			
