Spaces:

royeis
/

reasoning_loading_bar

Sleeping

App Files Files Community

reasoning_loading_bar / rnn_model.py

royeis

Add app.py

1229c85 7 months ago

raw

history blame contribute delete

4.75 kB

	import os
	import json
	import torch
	import torch.nn as nn
	from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
	from huggingface_hub import PyTorchModelHubMixin
	from huggingface_hub import snapshot_download

	# Model configuration constants
	D_FEATURES = 5120 # Hidden dimension size for DeepSeek-R1-Distill-Qwen-32B

	class RNNSeqRegressor(nn.Module):
	def __init__(self, hidden=128, num_layers=1):
	super().__init__()
	self.rnn = nn.GRU(
	input_size=D_FEATURES,
	hidden_size=hidden,
	num_layers=num_layers,
	batch_first=True,
	)
	self.head = nn.Linear(hidden, 1) # scalar per step

	def forward(self, x_pad, lengths):
	packed = pack_padded_sequence(
	x_pad, lengths.cpu(),
	batch_first=True, enforce_sorted=False
	)
	h_seq, _ = self.rnn(packed) # packed output
	h_seq, _ = pad_packed_sequence(
	h_seq, batch_first=True
	) # (B, L_max, hidden)
	return self.head(h_seq).squeeze(-1) # (B, L_max)




	class RNNSeqRegressorHub(nn.Module, PyTorchModelHubMixin):
	"""
	GRU-based sequence regressor with Hugging Face Hub integration.

	This model processes sequences of features and outputs a scalar prediction
	for each timestep, designed for thinking progress regression tasks.
	"""

	def __init__(self, hidden=5120, num_layers=1, **kwargs):
	super().__init__()
	self.hidden = hidden
	self.num_layers = num_layers
	self.d_features = D_FEATURES

	self.rnn = nn.GRU(
	input_size=D_FEATURES,
	hidden_size=hidden,
	num_layers=num_layers,
	batch_first=True,
	)
	self.head = nn.Linear(hidden, 1) # scalar per step

	def forward(self, x_pad, lengths):
	"""
	Forward pass through the model.

	Args:
	x_pad: Padded input sequences of shape (batch_size, max_length, d_features)
	lengths: Actual lengths of sequences in the batch

	Returns:
	predictions: Scalar predictions for each timestep (batch_size, max_length)
	"""
	packed = pack_padded_sequence(
	x_pad, lengths.cpu(),
	batch_first=True, enforce_sorted=False
	)
	h_seq, _ = self.rnn(packed) # packed output
	h_seq, _ = pad_packed_sequence(
	h_seq, batch_first=True
	) # (B, L_max, hidden)
	return self.head(h_seq).squeeze(-1) # (B, L_max)

	def _save_pretrained(self, save_directory):
	"""
	Save model configuration and weights.
	"""
	# Save configuration
	config = {
	"hidden": self.hidden,
	"num_layers": self.num_layers,
	"d_features": self.d_features,
	"model_type": "rnn_seq_regressor",
	"architecture": "GRU"
	}

	config_path = os.path.join(save_directory, "config.json")
	with open(config_path, "w") as f:
	json.dump(config, f, indent=2)

	model_path = os.path.join(save_directory, "pytorch_model.bin")
	torch.save(self.state_dict(), model_path)

	@classmethod
	def _from_pretrained(
	cls,
	model_id,
	revision=None,
	cache_dir=None,
	force_download=False,
	proxies=None,
	resume_download=False,
	local_files_only=False,
	token=None,
	map_location="cpu",
	strict=True,
	**model_kwargs,
	):
	"""
	Load model from pretrained repo on Hugging Face Hub.
	"""
	# Ensure we have a local directory with the model snapshot
	snapshot_path = snapshot_download(
	repo_id=model_id,
	revision=revision,
	cache_dir=cache_dir,
	force_download=force_download,
	proxies=proxies,
	resume_download=resume_download,
	local_files_only=local_files_only,
	token=token,
	)

	# Load config
	config_path = os.path.join(snapshot_path, "config.json")
	if os.path.exists(config_path):
	with open(config_path, "r") as f:
	config = json.load(f)
	model_kwargs.update(config)

	# Create model instance
	model = cls(**model_kwargs)

	# Load weights
	weights_path = os.path.join(snapshot_path, "pytorch_model.bin")
	if os.path.exists(weights_path):
	state_dict = torch.load(weights_path, map_location=map_location)
	model.load_state_dict(state_dict, strict=strict)

	return model