Spaces:

nonzeroexit
/

AMP-Classifier

Running

App Files Files Community

nonzeroexit commited on Mar 7, 2025

Commit

81bcfb3

verified ·

1 Parent(s): 98a1e1e

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -28

app.py CHANGED Viewed

@@ -5,9 +5,11 @@ import pandas as pd
 from propy import AAComposition, Autocorrelation, CTD, PseudoAAC
 from sklearn.preprocessing import MinMaxScaler
 model = joblib.load("RF.joblib")
 scaler = joblib.load("norm (1).joblib")
 selected_features =  [
     "_SolventAccessibilityC3", "_SecondaryStrC1", "_SecondaryStrC3", "_ChargeC1", "_PolarityC1",
     "_NormalizedVDWVC1", "_HydrophobicityC3", "_SecondaryStrT23", "_PolarizabilityD1001",
@@ -46,60 +48,66 @@ selected_features =  [
 def extract_features(sequence):
     try:
         comp_features = AAComposition.CalculateAAComposition(sequence)
         auto_features = Autocorrelation.CalculateAutoTotal(sequence)
         ctd_features = CTD.CalculateCTD(sequence)
-        pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)
-        # Combine all features into a dictionary
-        all_features = {**comp_features, **auto_features, **ctd_features, **pseudo_features}
-        # Convert to DataFrame
-        all_features_df = pd.DataFrame([all_features])
-        # Ensure all required features are present
-        missing_features = [feat for feat in selected_features if feat not in all_features_df.columns]
-        if missing_features:
-            print(f"Warning: Missing features - {missing_features}")
-            for feat in missing_features:
-                all_features_df[feat] = 0  # Fill missing features with 0
-        # Select only required features
         all_features_df = all_features_df[selected_features]
-        # Normalize the features
         normalized_features = scaler.transform(all_features_df)
         return normalized_features
     except ZeroDivisionError:
-        print("Error: Division by zero encountered in Moran autocorrelation calculation.")
-        return None
     except Exception as e:
-        print(f"Feature extraction error: {e}")
-        return None
 def predict(sequence):
-    """Predict if the sequence is an AMP or not."""
     features = extract_features(sequence)
-    features = np.array(features).reshape(1, -1)  # Reshape for a single sample
     prediction = model.predict(features)[0]
     probabilities = model.predict_proba(features)[0]
-    prob_amp = probabilities[0]
-    prob_non_amp = probabilities[1]
-    return f"{prob_amp * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)" if prediction == 0 else f"{prob_non_amp * 100:.2f}% chance of being Non-AMP"
 iface = gr.Interface(
     fn=predict,
     inputs=gr.Textbox(label="Enter Protein Sequence"),
     outputs=gr.Label(label="Prediction"),
     title="AMP Classifier",
-    description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
 )
-iface.launch(share=True)

 from propy import AAComposition, Autocorrelation, CTD, PseudoAAC
 from sklearn.preprocessing import MinMaxScaler
+# Load the pre-trained model and scaler
 model = joblib.load("RF.joblib")
 scaler = joblib.load("norm (1).joblib")
+# Define the list of selected features (IMPORTANT: Keep this consistent with training)
 selected_features =  [
     "_SolventAccessibilityC3", "_SecondaryStrC1", "_SecondaryStrC3", "_ChargeC1", "_PolarityC1",
     "_NormalizedVDWVC1", "_HydrophobicityC3", "_SecondaryStrT23", "_PolarizabilityD1001",
 def extract_features(sequence):
+    """Extracts features from a protein sequence and returns them as a NumPy array."""
     try:
+        # Calculate features from different ProPy modules
         comp_features = AAComposition.CalculateAAComposition(sequence)
         auto_features = Autocorrelation.CalculateAutoTotal(sequence)
         ctd_features = CTD.CalculateCTD(sequence)
+        pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)  # Use default parameters
+        # Combine all features into a single dictionary
+        all_features = {**comp_features, **auto_features, **ctd_features, **pseudo_features}
+        #print(len(all_features)) # debugging
+        # Convert to DataFrame, selecting only the required features
+        all_features_df = pd.DataFrame([all_features])
         all_features_df = all_features_df[selected_features]
+        # Normalize the features using the pre-fitted scaler
         normalized_features = scaler.transform(all_features_df)
         return normalized_features
     except ZeroDivisionError:
+        print("Error: Division by zero encountered in feature calculation.  Check your input sequence.")
+        return None  # Or handle appropriately
+    except KeyError as e:
+        print(f"Error: Missing feature {e}.  Check feature name consistency and ProPy version.")
+        return None # Or handle appropriately
     except Exception as e:
+        print(f"An unexpected error occurred during feature extraction: {e}")
+        return None  # Or handle appropriately
 def predict(sequence):
+    """Predicts whether the input sequence is an AMP and returns the prediction."""
     features = extract_features(sequence)
+    # Check if feature extraction was successful
+    if features is None:
+        return "Error: Could not extract features. Please check the input sequence."
+    # No need to reshape here; extract_features already returns the correct shape
     prediction = model.predict(features)[0]
     probabilities = model.predict_proba(features)[0]
+    # Determine output string based on prediction
+    if prediction == 0:
+        return f"{probabilities[0] * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)"
+    else:
+        return f"{probabilities[1] * 100:.2f}% chance of being Non-AMP"
+# Gradio interface setup
 iface = gr.Interface(
     fn=predict,
     inputs=gr.Textbox(label="Enter Protein Sequence"),
     outputs=gr.Label(label="Prediction"),
     title="AMP Classifier",
+    description="Enter an amino acid sequence (e.g., FLPVLAGGL) to predict whether it's an antimicrobial peptide (AMP) or not."
 )
+iface.launch(share=True)