Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import torch | |
| import torch.nn as nn | |
| from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence | |
| from huggingface_hub import PyTorchModelHubMixin | |
| from huggingface_hub import snapshot_download | |
| # Model configuration constants | |
| D_FEATURES = 5120 # Hidden dimension size for DeepSeek-R1-Distill-Qwen-32B | |
| class RNNSeqRegressor(nn.Module): | |
| def __init__(self, hidden=128, num_layers=1): | |
| super().__init__() | |
| self.rnn = nn.GRU( | |
| input_size=D_FEATURES, | |
| hidden_size=hidden, | |
| num_layers=num_layers, | |
| batch_first=True, | |
| ) | |
| self.head = nn.Linear(hidden, 1) # scalar per step | |
| def forward(self, x_pad, lengths): | |
| packed = pack_padded_sequence( | |
| x_pad, lengths.cpu(), | |
| batch_first=True, enforce_sorted=False | |
| ) | |
| h_seq, _ = self.rnn(packed) # packed output | |
| h_seq, _ = pad_packed_sequence( | |
| h_seq, batch_first=True | |
| ) # (B, L_max, hidden) | |
| return self.head(h_seq).squeeze(-1) # (B, L_max) | |
| class RNNSeqRegressorHub(nn.Module, PyTorchModelHubMixin): | |
| """ | |
| GRU-based sequence regressor with Hugging Face Hub integration. | |
| This model processes sequences of features and outputs a scalar prediction | |
| for each timestep, designed for thinking progress regression tasks. | |
| """ | |
| def __init__(self, hidden=5120, num_layers=1, **kwargs): | |
| super().__init__() | |
| self.hidden = hidden | |
| self.num_layers = num_layers | |
| self.d_features = D_FEATURES | |
| self.rnn = nn.GRU( | |
| input_size=D_FEATURES, | |
| hidden_size=hidden, | |
| num_layers=num_layers, | |
| batch_first=True, | |
| ) | |
| self.head = nn.Linear(hidden, 1) # scalar per step | |
| def forward(self, x_pad, lengths): | |
| """ | |
| Forward pass through the model. | |
| Args: | |
| x_pad: Padded input sequences of shape (batch_size, max_length, d_features) | |
| lengths: Actual lengths of sequences in the batch | |
| Returns: | |
| predictions: Scalar predictions for each timestep (batch_size, max_length) | |
| """ | |
| packed = pack_padded_sequence( | |
| x_pad, lengths.cpu(), | |
| batch_first=True, enforce_sorted=False | |
| ) | |
| h_seq, _ = self.rnn(packed) # packed output | |
| h_seq, _ = pad_packed_sequence( | |
| h_seq, batch_first=True | |
| ) # (B, L_max, hidden) | |
| return self.head(h_seq).squeeze(-1) # (B, L_max) | |
| def _save_pretrained(self, save_directory): | |
| """ | |
| Save model configuration and weights. | |
| """ | |
| # Save configuration | |
| config = { | |
| "hidden": self.hidden, | |
| "num_layers": self.num_layers, | |
| "d_features": self.d_features, | |
| "model_type": "rnn_seq_regressor", | |
| "architecture": "GRU" | |
| } | |
| config_path = os.path.join(save_directory, "config.json") | |
| with open(config_path, "w") as f: | |
| json.dump(config, f, indent=2) | |
| model_path = os.path.join(save_directory, "pytorch_model.bin") | |
| torch.save(self.state_dict(), model_path) | |
| def _from_pretrained( | |
| cls, | |
| model_id, | |
| revision=None, | |
| cache_dir=None, | |
| force_download=False, | |
| proxies=None, | |
| resume_download=False, | |
| local_files_only=False, | |
| token=None, | |
| map_location="cpu", | |
| strict=True, | |
| **model_kwargs, | |
| ): | |
| """ | |
| Load model from pretrained repo on Hugging Face Hub. | |
| """ | |
| # Ensure we have a local directory with the model snapshot | |
| snapshot_path = snapshot_download( | |
| repo_id=model_id, | |
| revision=revision, | |
| cache_dir=cache_dir, | |
| force_download=force_download, | |
| proxies=proxies, | |
| resume_download=resume_download, | |
| local_files_only=local_files_only, | |
| token=token, | |
| ) | |
| # Load config | |
| config_path = os.path.join(snapshot_path, "config.json") | |
| if os.path.exists(config_path): | |
| with open(config_path, "r") as f: | |
| config = json.load(f) | |
| model_kwargs.update(config) | |
| # Create model instance | |
| model = cls(**model_kwargs) | |
| # Load weights | |
| weights_path = os.path.join(snapshot_path, "pytorch_model.bin") | |
| if os.path.exists(weights_path): | |
| state_dict = torch.load(weights_path, map_location=map_location) | |
| model.load_state_dict(state_dict, strict=strict) | |
| return model |