Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -49,13 +49,10 @@ def extract_features(sequence):
|
|
| 49 |
|
| 50 |
all_features_dict = {}
|
| 51 |
|
| 52 |
-
# Calculate all
|
| 53 |
dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
|
| 54 |
-
|
| 55 |
-
# Add all dipeptide features
|
| 56 |
all_features_dict.update(dipeptide_features)
|
| 57 |
|
| 58 |
-
|
| 59 |
auto_features = Autocorrelation.CalculateAutoTotal(sequence)
|
| 60 |
all_features_dict.update(auto_features)
|
| 61 |
|
|
@@ -65,23 +62,20 @@ def extract_features(sequence):
|
|
| 65 |
pseudo_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
|
| 66 |
all_features_dict.update(pseudo_features)
|
| 67 |
|
| 68 |
-
# Convert
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
# Select features and handle missing columns
|
| 72 |
-
feature_df_selected = feature_df[selected_features].copy() # Use .copy() to avoid SettingWithCopyWarning
|
| 73 |
-
|
| 74 |
-
# Fill missing features with 0 (or another appropriate value)
|
| 75 |
-
feature_df_selected = feature_df_selected.fillna(0)
|
| 76 |
-
|
| 77 |
|
| 78 |
-
|
|
|
|
|
|
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
# Normalize the features
|
| 82 |
-
normalized_features = scaler.transform(feature_array)
|
| 83 |
|
| 84 |
-
return
|
| 85 |
|
| 86 |
|
| 87 |
def predict(sequence):
|
|
|
|
| 49 |
|
| 50 |
all_features_dict = {}
|
| 51 |
|
| 52 |
+
# Calculate all features
|
| 53 |
dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
|
|
|
|
|
|
|
| 54 |
all_features_dict.update(dipeptide_features)
|
| 55 |
|
|
|
|
| 56 |
auto_features = Autocorrelation.CalculateAutoTotal(sequence)
|
| 57 |
all_features_dict.update(auto_features)
|
| 58 |
|
|
|
|
| 62 |
pseudo_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
|
| 63 |
all_features_dict.update(pseudo_features)
|
| 64 |
|
| 65 |
+
# Convert all features to DataFrame
|
| 66 |
+
feature_df_all = pd.DataFrame([all_features_dict])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
+
# Normalize ALL features
|
| 69 |
+
normalized_feature_array = scaler.transform(feature_df_all.values) # Normalize the numpy array
|
| 70 |
+
normalized_feature_df = pd.DataFrame(normalized_feature_array, columns=feature_df_all.columns) # Convert back to DataFrame with original column names
|
| 71 |
|
| 72 |
+
# Select features AFTER normalization
|
| 73 |
+
feature_df_selected = normalized_feature_df[selected_features].copy()
|
| 74 |
+
feature_df_selected = feature_df_selected.fillna(0) # Fill missing if any after selection (though unlikely now)
|
| 75 |
+
feature_array = feature_df_selected.values
|
| 76 |
|
|
|
|
|
|
|
| 77 |
|
| 78 |
+
return feature_array
|
| 79 |
|
| 80 |
|
| 81 |
def predict(sequence):
|