import java.io.*;
import java.util.*;
import java.lang.*;
import java.lang.Math.*;
import java.text.*;
import java.lang.String.*;

/**
*ligCategoryVariation - Determines the variation in ligand categories within the binding site cluster and outputs a score between 0 and 1
*/

public class CAMEOligCategoryVariation
{
	float numbligandtype = 0;
	float totalligs = 0;
	float categorydiversity = 0;
	double I = 0.000;
	double O = 0.000;
	double P = 0.000;
	double N = 0.000;
	StringBuffer OutputBuff = new StringBuffer();
	DecimalFormat df = new DecimalFormat("0.000");			

        HashSet metalLigsSet = new HashSet();
        HashSet inorganicLigsSet = new HashSet();
        HashSet DNALigsSet = new HashSet();
        HashSet organicLigsSet = new HashSet();
        HashSet peptideLigsSet = new HashSet();
	
    public CAMEOligCategoryVariation( String Fn_File )
	{
		try
		{
			//System.out.println(Fn_File);
			//parse categorized biologically relevant ligand list - metals, inorganic, DNA/RNA and organic
			// HashSet metalLigsSet = new HashSet();
			// HashSet inorganicLigsSet = new HashSet();
			// HashSet DNALigsSet = new HashSet();
			// HashSet organicLigsSet = new HashSet();
			// HashSet peptideLigsSet = new HashSet();
			
			InputStream is1 = CAMEOligCategoryVariation.class.getResourceAsStream("/uniprotLigandListExtended_classified.txt");
			BufferedReader in1 = new BufferedReader(new InputStreamReader(is1));
			String ligline = in1.readLine();
			StringBuffer BindingLigs = new StringBuffer();
			//StringBuffer OutputBuff = new StringBuffer();

			//System.out.println("Calculation global binding site biochemical propensity scores " );
			
			do
			{
				//Metal ligands - ION - I
				if( !ligline.startsWith( "#Metal" ) && (ligline.length() > 0))
				{
					String metalligand =  ligline.substring( 0, ligline.indexOf("=") );
					metalLigsSet.add( metalligand );
				}
				ligline = in1.readLine();
				
			}
			while( !ligline.startsWith("//"));
			
			ligline = in1.readLine();
			
			do
			{
				//Inorganic ligands - not included by CAMEO - including in Ion list 
				if( !ligline.startsWith( "#Inorganic" ) && (ligline.length() > 0))
				{
					String inorganicligand =  ligline.substring( 0, ligline.indexOf("=") );
					//inorganicLigsSet.add( inorganicligand );
					metalLigsSet.add( inorganicligand );
				}
				ligline = in1.readLine();
			}
			while( !ligline.startsWith("//"));
			
			
			ligline = in1.readLine();
			
			do
			{
				//DNA/RNA ligands - POLYNUCLEOTIDES - N
				if( !ligline.startsWith( "#DNA/RNA" ) && (ligline.length() > 0))
				{
					String DNAligand =  ligline.substring( 0, ligline.indexOf("=") );
					DNALigsSet.add( DNAligand );
				}
				ligline = in1.readLine();
			}
			while( !ligline.startsWith("//"));
			
			ligline = in1.readLine();
			
			do
			{
				//Organic ligands - O
				if( !ligline.startsWith( "#Organic" ) && (ligline.length() > 0))
				{
					String organicligand =  ligline.substring( 0, ligline.indexOf("=") );
					organicLigsSet.add( organicligand );
				}
				ligline = in1.readLine();
			}
			while( !ligline.startsWith("//"));	

			ligline = in1.readLine();

			do
			{
				//Peptides ligands - Amino Acids - P 
				if( !ligline.startsWith( "#Peptides" ) && (ligline.length() > 0))
				{
					String peptideligand =  ligline.substring( 0 );
					peptideLigsSet.add( peptideligand );
				}
				ligline = in1.readLine();
			}
			while( !ligline.startsWith("//"));
			
			in1.close();
			
			//System.out.println("ligand classification parsing finished");
			
			FileReader file1 = new FileReader( Fn_File );
			BufferedReader buff1 = new BufferedReader(file1);
			String line1 = buff1.readLine();
			
			do
			{

				if( line1.contains( "All ligands clustered at site" ))
				{
					//System.out.println(line1);
					StringTokenizer ligtokens = new StringTokenizer( line1, ":;" );
				
					String comment = ligtokens.nextToken(); 
					//System.out.println("comment " + comment);
					Vector typeVect = new Vector();
					Hashtable ligHash = new Hashtable();
					String centroid = ligtokens.nextToken(); 
					//System.out.println("Centroid " +centroid);
						
					while( ligtokens.hasMoreTokens())
					{
						String ligtype = ligtokens.nextToken(); 
						//System.out.println("ligtype " + ligtype);
						StringTokenizer ligtypetoke = new StringTokenizer( ligtype, ", " );

						while( ligtypetoke.hasMoreTokens())
						{
							numbligandtype++;
							String ligTpNum1 = ligtypetoke.nextToken(); 
							StringTokenizer ligtypeNumb = new StringTokenizer( ligTpNum1, "- " );
							String lig = ligtypeNumb.nextToken(); 
							//System.out.println("Ligand " + numbligandtype + ": " + lig);
							String Numb = ligtypeNumb.nextToken();
							float NUMB = (new Float(Numb)).floatValue();
							totalligs += NUMB;
							typeVect.addElement(lig);
							ligHash.put(lig, NUMB);
						}
					}

					StringTokenizer ligtokens1 = new StringTokenizer( line1, ":;" );
				
					String comment1 = ligtokens1.nextToken(); 
					//System.out.println("comment1 " + comment1);
					String centroid1 = ligtokens1.nextToken(); 
					//System.out.println("Centroid1 " +centroid1);
					BindingLigs.append( " | " );

					while( ligtokens1.hasMoreTokens())
					{
						String ligtype1 = ligtokens1.nextToken(); 
						//System.out.println("ligtype1 " + ligtype1);
						StringTokenizer ligtypetoke1 = new StringTokenizer( ligtype1, ", " );

						while( ligtypetoke1.hasMoreTokens())
						{
							String ligTpNum11 = ligtypetoke1.nextToken(); 
							StringTokenizer ligtypeNumb1 = new StringTokenizer( ligTpNum11, "- " );
							String lig1 = ligtypeNumb1.nextToken(); 
							//System.out.println("Ligand " + ": " + lig1);
							String Numb1 = ligtypeNumb1.nextToken();
							float NUMB1 = (new Float(Numb1)).floatValue();
							float ligprob = NUMB1/totalligs;
							//System.out.println("Ligand " + ": " + lig1 + "Ligand Probability : " +ligprob );
							BindingLigs.append( lig1+"=" +df.format(ligprob) + "; " );
						}
					}
					
					
					//System.out.println(typeVect);
					
					float metalligs = 0;
					float inorganicligs = 0;
					float dnaligs = 0;
					float organicligs = 0;
					float peptideligs = 0;
					float ligandvariation = 0;	

// 					for (int i = 0; i <typeVect.size(); i++)
// 					{
						

					for( Enumeration enumer = ligHash.keys(); enumer.hasMoreElements(); )
					{
						String ligand = (String)enumer.nextElement();
						Float NumbLigType = (Float)ligHash.get( ligand );
						//String numbLigType = (String)ligHash.get( ligand );
						//float  NumbLigType = Float(numbLigType).floatValue();
						
						if(metalLigsSet.contains(ligand))
						{
							metalligs = metalligs + NumbLigType;
						}
						
						if(inorganicLigsSet.contains(ligand))
						{
							inorganicligs = inorganicligs + NumbLigType;
						}
						
						if(DNALigsSet.contains(ligand))
						{
							dnaligs = dnaligs + NumbLigType; 
						}
						
						if(organicLigsSet.contains(ligand))
						{
							organicligs =  organicligs + NumbLigType;
						}

						if(peptideLigsSet.contains(ligand))
						{
							peptideligs = peptideligs +  NumbLigType;
						}
					}

					if( metalligs > 0 )
					{
						I = (metalligs/totalligs);
					}
					
					if( metalligs <= 0 )
					{
						I = 0.000;
					}
// 					if( inorganicligs > 0 )
// 					{
// 						ligandvariation++;
// 					}
// 					
					if( dnaligs > 0 )
					{
						N = (dnaligs/totalligs);
					}

					if( dnaligs <= 0 )
					{
						N = 0.000;
					}

					
					if( organicligs > 0 )
					{
						O = (organicligs/totalligs);
					}

					if( organicligs <= 0 )
					{
						O = 0.000;
					}


					if( peptideligs > 0 )
					{
						P = (peptideligs/totalligs);
					}

					if( peptideligs <= 0 )
					{
						P = 0.000;
					}
					
					//System.out.println("Total number of ligands: " + totalligs);
					//System.out.println("Number of ligand types: " + numbligandtype);
					
					//System.out.println("Metal ligands I: " + I);
					//System.out.println("Inorganic ligands: " + inorganicligs);
					//System.out.println("DNA ligands: N " + N);
					//System.out.println("Organic lignads: O " + O);
					//System.out.println("Peptide lignads: P " + P);
					//System.out.println("Ligand variation " + ligandvariation);
							
					//categorydiversity = (1-(ligandvariation/totalligs));
					//System.out.println("Variation in ligand category: " + categorydiversity);

					OutputBuff.append(" | " + "I=" + df.format(I) + "; " +  "O=" + df.format(O) + "; " + "N=" + df.format(N) + "; " + "P=" +df.format(P) + ";" + BindingLigs.toString());
			
				}
				line1 = buff1.readLine();
				
				if( line1.contains( "No binding sites predicted" ))
				{
					System.out.println("No binding sites predicted");
				}

				//System.out.println(OutputBuff.toString());
				
			}
			while( !line1.startsWith("END"));
			buff1.close();
			
		}
	
		catch( Exception e )
		{
			System.out.println( "Error executing CAMEOligCategoryVariation!" + e );
		}	
	}

	public float gettotalligs()
	{
		return totalligs;
	}
	
	public float getnumbligandtype()
	{
		return numbligandtype;
	}
	
	public float getcategorydiversity()
	{
		return categorydiversity;
	}

	public double getI()
	{
		return I;
	}
	
	public double getN()
	{
		return N;
	}
	
	public double getO()
	{
		return O;
	}

	public double getP()
	{
		return P;
	}

        public StringBuffer getOutputBuff()
	{
		return OutputBuff;
	}
        
        public HashSet getmetalLigsSet()
        {

	    return metalLigsSet;
	}

        public HashSet getDNALigsSet()
        {

	    return DNALigsSet;
	}

        public HashSet getorganicLigsSet()
        {

	    return organicLigsSet;
	}

        public HashSet getpeptideLigsSet()
        {

	    return peptideLigsSet;
	}

	public static void main( String args[])
	{
		CAMEOligCategoryVariation cameolcv = new CAMEOligCategoryVariation( args[0] );
		float totalligs = (Float)cameolcv.gettotalligs();
		float numbligandtype = (Float)cameolcv.getnumbligandtype();
		float categorydiversity = (Float)cameolcv.getcategorydiversity();
		double I = (Double)cameolcv.getI();
		double N = (Double)cameolcv.getI();
		double O = (Double)cameolcv.getI();
		double P = (Double)cameolcv.getI();
		StringBuffer  OutputBuff = (StringBuffer)cameolcv.getOutputBuff();
		HashSet metalLigsSet = (HashSet)cameolcv.getmetalLigsSet();
		HashSet DNALigsSet= (HashSet)cameolcv.getDNALigsSet();
		HashSet organicLigsSet = (HashSet)cameolcv.getorganicLigsSet();
		HashSet peptideLigsSet = (HashSet)cameolcv.getpeptideLigsSet();
		System.out.println("Calculation global binding site biochemical propensity scores " );
		System.out.println(OutputBuff.toString());


	}
}
			