reasoning_loading_bar / rnn_model.py
royeis's picture
Add app.py
1229c85
import os
import json
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from huggingface_hub import PyTorchModelHubMixin
from huggingface_hub import snapshot_download
# Model configuration constants
D_FEATURES = 5120 # Hidden dimension size for DeepSeek-R1-Distill-Qwen-32B
class RNNSeqRegressor(nn.Module):
def __init__(self, hidden=128, num_layers=1):
super().__init__()
self.rnn = nn.GRU(
input_size=D_FEATURES,
hidden_size=hidden,
num_layers=num_layers,
batch_first=True,
)
self.head = nn.Linear(hidden, 1) # scalar per step
def forward(self, x_pad, lengths):
packed = pack_padded_sequence(
x_pad, lengths.cpu(),
batch_first=True, enforce_sorted=False
)
h_seq, _ = self.rnn(packed) # packed output
h_seq, _ = pad_packed_sequence(
h_seq, batch_first=True
) # (B, L_max, hidden)
return self.head(h_seq).squeeze(-1) # (B, L_max)
class RNNSeqRegressorHub(nn.Module, PyTorchModelHubMixin):
"""
GRU-based sequence regressor with Hugging Face Hub integration.
This model processes sequences of features and outputs a scalar prediction
for each timestep, designed for thinking progress regression tasks.
"""
def __init__(self, hidden=5120, num_layers=1, **kwargs):
super().__init__()
self.hidden = hidden
self.num_layers = num_layers
self.d_features = D_FEATURES
self.rnn = nn.GRU(
input_size=D_FEATURES,
hidden_size=hidden,
num_layers=num_layers,
batch_first=True,
)
self.head = nn.Linear(hidden, 1) # scalar per step
def forward(self, x_pad, lengths):
"""
Forward pass through the model.
Args:
x_pad: Padded input sequences of shape (batch_size, max_length, d_features)
lengths: Actual lengths of sequences in the batch
Returns:
predictions: Scalar predictions for each timestep (batch_size, max_length)
"""
packed = pack_padded_sequence(
x_pad, lengths.cpu(),
batch_first=True, enforce_sorted=False
)
h_seq, _ = self.rnn(packed) # packed output
h_seq, _ = pad_packed_sequence(
h_seq, batch_first=True
) # (B, L_max, hidden)
return self.head(h_seq).squeeze(-1) # (B, L_max)
def _save_pretrained(self, save_directory):
"""
Save model configuration and weights.
"""
# Save configuration
config = {
"hidden": self.hidden,
"num_layers": self.num_layers,
"d_features": self.d_features,
"model_type": "rnn_seq_regressor",
"architecture": "GRU"
}
config_path = os.path.join(save_directory, "config.json")
with open(config_path, "w") as f:
json.dump(config, f, indent=2)
model_path = os.path.join(save_directory, "pytorch_model.bin")
torch.save(self.state_dict(), model_path)
@classmethod
def _from_pretrained(
cls,
model_id,
revision=None,
cache_dir=None,
force_download=False,
proxies=None,
resume_download=False,
local_files_only=False,
token=None,
map_location="cpu",
strict=True,
**model_kwargs,
):
"""
Load model from pretrained repo on Hugging Face Hub.
"""
# Ensure we have a local directory with the model snapshot
snapshot_path = snapshot_download(
repo_id=model_id,
revision=revision,
cache_dir=cache_dir,
force_download=force_download,
proxies=proxies,
resume_download=resume_download,
local_files_only=local_files_only,
token=token,
)
# Load config
config_path = os.path.join(snapshot_path, "config.json")
if os.path.exists(config_path):
with open(config_path, "r") as f:
config = json.load(f)
model_kwargs.update(config)
# Create model instance
model = cls(**model_kwargs)
# Load weights
weights_path = os.path.join(snapshot_path, "pytorch_model.bin")
if os.path.exists(weights_path):
state_dict = torch.load(weights_path, map_location=map_location)
model.load_state_dict(state_dict, strict=strict)
return model