Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,18 +1,15 @@
|
|
| 1 |
-
|
| 2 |
-
from pydantic import BaseModel
|
| 3 |
import joblib
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
from propy import AAComposition
|
| 7 |
from sklearn.preprocessing import MinMaxScaler
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
app = FastAPI()
|
| 11 |
-
|
| 12 |
-
# Load trained SVM model and scaler
|
| 13 |
model = joblib.load("SVM.joblib")
|
| 14 |
scaler = joblib.load("norm.joblib")
|
| 15 |
|
|
|
|
| 16 |
# List of features used in your model
|
| 17 |
selected_features = [
|
| 18 |
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
|
|
@@ -38,48 +35,51 @@ selected_features = [
|
|
| 38 |
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
|
| 39 |
]
|
| 40 |
|
| 41 |
-
# Define request model
|
| 42 |
-
class SequenceRequest(BaseModel):
|
| 43 |
-
sequence: str
|
| 44 |
-
|
| 45 |
-
# Feature extraction function
|
| 46 |
def extract_features(sequence):
|
| 47 |
"""Extract only the required features and normalize them."""
|
| 48 |
# Compute all possible features
|
| 49 |
-
all_features = AAComposition.CalculateAADipeptideComposition(sequence)
|
| 50 |
# Extract the values from the dictionary
|
| 51 |
-
feature_values = list(all_features.values())
|
| 52 |
# Convert to NumPy array for normalization
|
| 53 |
feature_array = np.array(feature_values).reshape(-1, 1)
|
| 54 |
-
feature_array = feature_array[:420]
|
| 55 |
# Min-Max Normalization
|
| 56 |
normalized_features = scaler.transform(feature_array.T)
|
|
|
|
| 57 |
# Reshape normalized_features back to a single dimension
|
| 58 |
-
normalized_features = normalized_features.flatten()
|
|
|
|
| 59 |
# Create a dictionary with selected features
|
| 60 |
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
|
| 61 |
if feature in all_features}
|
|
|
|
| 62 |
# Convert dictionary to dataframe
|
| 63 |
selected_feature_df = pd.DataFrame([selected_feature_dict])
|
|
|
|
| 64 |
# Convert dataframe to numpy array
|
| 65 |
selected_feature_array = selected_feature_df.T.to_numpy()
|
|
|
|
| 66 |
return selected_feature_array
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
def predict(
|
| 71 |
"""Predict AMP vs Non-AMP"""
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
|
| 83 |
-
@app.get("/")
|
| 84 |
-
def read_root():
|
| 85 |
-
return {"status": "OK"}
|
|
|
|
| 1 |
+
import gradio as gr
|
|
|
|
| 2 |
import joblib
|
| 3 |
import numpy as np
|
| 4 |
import pandas as pd
|
| 5 |
from propy import AAComposition
|
| 6 |
from sklearn.preprocessing import MinMaxScaler
|
| 7 |
|
| 8 |
+
# Load trained SVM model and scaler (Ensure both files exist in the Space)
|
|
|
|
|
|
|
|
|
|
| 9 |
model = joblib.load("SVM.joblib")
|
| 10 |
scaler = joblib.load("norm.joblib")
|
| 11 |
|
| 12 |
+
|
| 13 |
# List of features used in your model
|
| 14 |
selected_features = [
|
| 15 |
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
|
|
|
|
| 35 |
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
|
| 36 |
]
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def extract_features(sequence):
|
| 39 |
"""Extract only the required features and normalize them."""
|
| 40 |
# Compute all possible features
|
| 41 |
+
all_features = AAComposition.CalculateAADipeptideComposition(sequence) # Amino Acid Composition
|
| 42 |
# Extract the values from the dictionary
|
| 43 |
+
feature_values = list(all_features.values()) # Extract values only
|
| 44 |
# Convert to NumPy array for normalization
|
| 45 |
feature_array = np.array(feature_values).reshape(-1, 1)
|
| 46 |
+
feature_array = feature_array[: 420]
|
| 47 |
# Min-Max Normalization
|
| 48 |
normalized_features = scaler.transform(feature_array.T)
|
| 49 |
+
|
| 50 |
# Reshape normalized_features back to a single dimension
|
| 51 |
+
normalized_features = normalized_features.flatten() # Flatten array
|
| 52 |
+
|
| 53 |
# Create a dictionary with selected features
|
| 54 |
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
|
| 55 |
if feature in all_features}
|
| 56 |
+
|
| 57 |
# Convert dictionary to dataframe
|
| 58 |
selected_feature_df = pd.DataFrame([selected_feature_dict])
|
| 59 |
+
|
| 60 |
# Convert dataframe to numpy array
|
| 61 |
selected_feature_array = selected_feature_df.T.to_numpy()
|
| 62 |
+
|
| 63 |
return selected_feature_array
|
| 64 |
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def predict(sequence):
|
| 68 |
"""Predict AMP vs Non-AMP"""
|
| 69 |
+
features = extract_features(sequence)
|
| 70 |
+
prediction = model.predict(features.T)[0]
|
| 71 |
+
return "AMP" if prediction == 0 else "Non-AMP"
|
| 72 |
+
|
| 73 |
+
# Create Gradio interface
|
| 74 |
+
iface = gr.Interface(
|
| 75 |
+
fn=predict,
|
| 76 |
+
inputs=gr.Textbox(label="Enter Protein Sequence"),
|
| 77 |
+
outputs=gr.Label(label="Prediction"),
|
| 78 |
+
title="AMP Classifier",
|
| 79 |
+
description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# Launch app
|
| 83 |
+
iface.launch(share=True)
|
| 84 |
|
| 85 |
+
I neeed to use the same code for fastapi
|
|
|
|
|
|
|
|
|