ash12321 commited on
Commit
cc156cd
·
verified ·
1 Parent(s): 48a4ec7

Upload model.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. model.py +90 -84
model.py CHANGED
@@ -1,74 +1,86 @@
1
  """
2
- ResidualConvAutoencoder - Deepfake Detection Model
3
- Architecture: 5-stage encoder-decoder with residual blocks
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
 
6
  import torch
7
  import torch.nn as nn
8
 
 
9
  class ResidualBlock(nn.Module):
10
- """Residual block with two conv layers and skip connection"""
11
- def __init__(self, channels):
12
  super().__init__()
13
  self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
14
  self.bn1 = nn.BatchNorm2d(channels)
15
  self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
16
  self.bn2 = nn.BatchNorm2d(channels)
17
  self.relu = nn.ReLU(inplace=True)
 
18
 
19
  def forward(self, x):
20
  residual = x
21
  out = self.relu(self.bn1(self.conv1(x)))
 
22
  out = self.bn2(self.conv2(out))
23
  out += residual
24
  return self.relu(out)
25
 
 
26
  class ResidualConvAutoencoder(nn.Module):
27
  """
28
- Residual Convolutional Autoencoder for image reconstruction and deepfake detection.
29
 
30
  Args:
31
- latent_dim (int): Dimension of latent space (default: 512)
 
32
 
33
  Input:
34
- x: Tensor of shape (batch_size, 3, 128, 128), values in [-1, 1]
 
35
 
36
  Output:
37
- reconstructed: Tensor of shape (batch_size, 3, 128, 128), values in [-1, 1]
38
  latent: Tensor of shape (batch_size, latent_dim)
39
  """
40
- def __init__(self, latent_dim=512):
41
  super().__init__()
42
- self.latent_dim = latent_dim
43
 
44
  # Encoder: 128x128 -> 4x4
45
  self.encoder = nn.Sequential(
46
- # Stage 1: 128 -> 64
47
- nn.Conv2d(3, 64, 4, stride=2, padding=1),
48
  nn.BatchNorm2d(64),
49
  nn.ReLU(inplace=True),
50
- ResidualBlock(64),
51
 
52
- # Stage 2: 64 -> 32
53
- nn.Conv2d(64, 128, 4, stride=2, padding=1),
54
  nn.BatchNorm2d(128),
55
  nn.ReLU(inplace=True),
56
- ResidualBlock(128),
57
 
58
- # Stage 3: 32 -> 16
59
- nn.Conv2d(128, 256, 4, stride=2, padding=1),
60
  nn.BatchNorm2d(256),
61
  nn.ReLU(inplace=True),
62
- ResidualBlock(256),
63
 
64
- # Stage 4: 16 -> 8
65
- nn.Conv2d(256, 512, 4, stride=2, padding=1),
66
  nn.BatchNorm2d(512),
67
  nn.ReLU(inplace=True),
68
- ResidualBlock(512),
69
 
70
- # Stage 5: 8 -> 4
71
- nn.Conv2d(512, 512, 4, stride=2, padding=1),
72
  nn.BatchNorm2d(512),
73
  nn.ReLU(inplace=True),
74
  )
@@ -79,44 +91,39 @@ class ResidualConvAutoencoder(nn.Module):
79
 
80
  # Decoder: 4x4 -> 128x128
81
  self.decoder = nn.Sequential(
82
- # Stage 1: 4 -> 8
83
- nn.ConvTranspose2d(512, 512, 4, stride=2, padding=1),
84
  nn.BatchNorm2d(512),
85
  nn.ReLU(inplace=True),
86
- ResidualBlock(512),
87
 
88
- # Stage 2: 8 -> 16
89
- nn.ConvTranspose2d(512, 256, 4, stride=2, padding=1),
90
  nn.BatchNorm2d(256),
91
  nn.ReLU(inplace=True),
92
- ResidualBlock(256),
93
 
94
- # Stage 3: 16 -> 32
95
- nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),
96
  nn.BatchNorm2d(128),
97
  nn.ReLU(inplace=True),
98
- ResidualBlock(128),
99
 
100
- # Stage 4: 32 -> 64
101
- nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1),
102
  nn.BatchNorm2d(64),
103
  nn.ReLU(inplace=True),
104
- ResidualBlock(64),
105
 
106
- # Stage 5: 64 -> 128
107
- nn.ConvTranspose2d(64, 3, 4, stride=2, padding=1),
108
- nn.Tanh() # Output in [-1, 1]
109
  )
110
 
111
  def forward(self, x):
112
  """
113
- Forward pass through the autoencoder.
114
 
115
  Args:
116
  x: Input tensor of shape (batch_size, 3, 128, 128)
117
 
118
  Returns:
119
- reconstructed: Reconstructed image of shape (batch_size, 3, 128, 128)
120
  latent: Latent representation of shape (batch_size, latent_dim)
121
  """
122
  # Encode
@@ -131,63 +138,62 @@ class ResidualConvAutoencoder(nn.Module):
131
 
132
  return reconstructed, latent
133
 
134
- def encode(self, x):
135
- """Extract latent representation only"""
136
- x = self.encoder(x)
137
- x = x.view(x.size(0), -1)
138
- latent = self.fc_encoder(x)
139
- return latent
140
-
141
- def decode(self, latent):
142
- """Reconstruct from latent representation"""
143
- x = self.fc_decoder(latent)
144
- x = x.view(x.size(0), 512, 4, 4)
145
- reconstructed = self.decoder(x)
146
- return reconstructed
147
-
148
- def reconstruction_error(self, x, reduction='mean'):
149
  """
150
- Calculate per-sample reconstruction error (MSE).
151
- Useful for anomaly/deepfake detection.
152
 
153
  Args:
154
- x: Input tensor
155
- reduction: 'mean' for average error, 'none' for per-sample errors
156
 
157
  Returns:
158
- Reconstruction error (MSE)
159
  """
160
  reconstructed, _ = self.forward(x)
161
- error = (reconstructed - x) ** 2
162
-
163
- if reduction == 'mean':
164
- return error.mean()
165
- elif reduction == 'none':
166
- return error.view(x.size(0), -1).mean(dim=1)
167
- else:
168
- raise ValueError(f"Unknown reduction: {reduction}")
169
 
170
- def load_model(checkpoint_path, device='cuda'):
171
  """
172
- Load pretrained model from checkpoint.
173
 
174
  Args:
175
- checkpoint_path: Path to .ckpt file
176
- device: 'cuda' or 'cpu'
 
177
 
178
  Returns:
179
- model: Loaded ResidualConvAutoencoder in eval mode
 
180
  """
181
- model = ResidualConvAutoencoder(latent_dim=512)
182
- checkpoint = torch.load(checkpoint_path, map_location=device)
183
 
184
- if 'model_state_dict' in checkpoint:
185
- model.load_state_dict(checkpoint['model_state_dict'])
186
- elif 'state_dict' in checkpoint:
187
- model.load_state_dict(checkpoint['state_dict'])
188
- else:
189
- model.load_state_dict(checkpoint)
190
 
191
- model = model.to(device)
 
 
192
  model.eval()
193
- return model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Residual Convolutional Autoencoder Model
3
+
4
+ Usage:
5
+ from model import ResidualConvAutoencoder, load_model
6
+ import torch
7
+
8
+ # Option 1: Create and load manually
9
+ model = ResidualConvAutoencoder(latent_dim=512)
10
+ checkpoint = torch.load('model_universal_best.ckpt')
11
+ model.load_state_dict(checkpoint['model_state_dict'])
12
+ model.eval()
13
+
14
+ # Option 2: Use helper function
15
+ model, checkpoint = load_model('model_universal_best.ckpt', device='cuda')
16
  """
17
 
18
  import torch
19
  import torch.nn as nn
20
 
21
+
22
  class ResidualBlock(nn.Module):
23
+ """Residual block with two convolutional layers and optional dropout"""
24
+ def __init__(self, channels, dropout=0.1):
25
  super().__init__()
26
  self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
27
  self.bn1 = nn.BatchNorm2d(channels)
28
  self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
29
  self.bn2 = nn.BatchNorm2d(channels)
30
  self.relu = nn.ReLU(inplace=True)
31
+ self.dropout = nn.Dropout2d(dropout) if dropout > 0 else nn.Identity()
32
 
33
  def forward(self, x):
34
  residual = x
35
  out = self.relu(self.bn1(self.conv1(x)))
36
+ out = self.dropout(out)
37
  out = self.bn2(self.conv2(out))
38
  out += residual
39
  return self.relu(out)
40
 
41
+
42
  class ResidualConvAutoencoder(nn.Module):
43
  """
44
+ Residual Convolutional Autoencoder for image reconstruction
45
 
46
  Args:
47
+ latent_dim (int): Dimension of the latent space. Default: 512
48
+ dropout (float): Dropout rate for regularization. Default: 0.1
49
 
50
  Input:
51
+ x: Tensor of shape (batch_size, 3, 128, 128)
52
+ Values should be normalized to [-1, 1]
53
 
54
  Output:
55
+ reconstructed: Tensor of shape (batch_size, 3, 128, 128)
56
  latent: Tensor of shape (batch_size, latent_dim)
57
  """
58
+ def __init__(self, latent_dim=512, dropout=0.1):
59
  super().__init__()
 
60
 
61
  # Encoder: 128x128 -> 4x4
62
  self.encoder = nn.Sequential(
63
+ nn.Conv2d(3, 64, 4, stride=2, padding=1), # 128 -> 64
 
64
  nn.BatchNorm2d(64),
65
  nn.ReLU(inplace=True),
66
+ ResidualBlock(64, dropout),
67
 
68
+ nn.Conv2d(64, 128, 4, stride=2, padding=1), # 64 -> 32
 
69
  nn.BatchNorm2d(128),
70
  nn.ReLU(inplace=True),
71
+ ResidualBlock(128, dropout),
72
 
73
+ nn.Conv2d(128, 256, 4, stride=2, padding=1), # 32 -> 16
 
74
  nn.BatchNorm2d(256),
75
  nn.ReLU(inplace=True),
76
+ ResidualBlock(256, dropout),
77
 
78
+ nn.Conv2d(256, 512, 4, stride=2, padding=1), # 16 -> 8
 
79
  nn.BatchNorm2d(512),
80
  nn.ReLU(inplace=True),
81
+ ResidualBlock(512, dropout),
82
 
83
+ nn.Conv2d(512, 512, 4, stride=2, padding=1), # 8 -> 4
 
84
  nn.BatchNorm2d(512),
85
  nn.ReLU(inplace=True),
86
  )
 
91
 
92
  # Decoder: 4x4 -> 128x128
93
  self.decoder = nn.Sequential(
94
+ nn.ConvTranspose2d(512, 512, 4, stride=2, padding=1), # 4 -> 8
 
95
  nn.BatchNorm2d(512),
96
  nn.ReLU(inplace=True),
97
+ ResidualBlock(512, dropout),
98
 
99
+ nn.ConvTranspose2d(512, 256, 4, stride=2, padding=1), # 8 -> 16
 
100
  nn.BatchNorm2d(256),
101
  nn.ReLU(inplace=True),
102
+ ResidualBlock(256, dropout),
103
 
104
+ nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1), # 16 -> 32
 
105
  nn.BatchNorm2d(128),
106
  nn.ReLU(inplace=True),
107
+ ResidualBlock(128, dropout),
108
 
109
+ nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1), # 32 -> 64
 
110
  nn.BatchNorm2d(64),
111
  nn.ReLU(inplace=True),
112
+ ResidualBlock(64, dropout),
113
 
114
+ nn.ConvTranspose2d(64, 3, 4, stride=2, padding=1), # 64 -> 128
115
+ nn.Tanh()
 
116
  )
117
 
118
  def forward(self, x):
119
  """
120
+ Forward pass through the autoencoder
121
 
122
  Args:
123
  x: Input tensor of shape (batch_size, 3, 128, 128)
124
 
125
  Returns:
126
+ reconstructed: Reconstructed tensor of shape (batch_size, 3, 128, 128)
127
  latent: Latent representation of shape (batch_size, latent_dim)
128
  """
129
  # Encode
 
138
 
139
  return reconstructed, latent
140
 
141
+ def reconstruction_error(self, x):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  """
143
+ Compute per-sample reconstruction error (MSE)
 
144
 
145
  Args:
146
+ x: Input tensor of shape (batch_size, 3, 128, 128)
 
147
 
148
  Returns:
149
+ error: Tensor of shape (batch_size,) containing MSE for each sample
150
  """
151
  reconstructed, _ = self.forward(x)
152
+ error = ((reconstructed - x) ** 2).view(x.size(0), -1).mean(dim=1)
153
+ return error
154
+
 
 
 
 
 
155
 
156
+ def load_model(checkpoint_path, device='cuda', dropout=0.1):
157
  """
158
+ Load a pretrained model from checkpoint
159
 
160
  Args:
161
+ checkpoint_path: Path to the checkpoint file
162
+ device: Device to load the model on ('cuda' or 'cpu')
163
+ dropout: Dropout rate (must match training config)
164
 
165
  Returns:
166
+ model: Loaded ResidualConvAutoencoder model in eval mode
167
+ checkpoint: Full checkpoint dictionary with metadata
168
  """
169
+ checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=False)
 
170
 
171
+ # Get config if available
172
+ config = checkpoint.get('config', {})
173
+ latent_dim = config.get('latent_dim', 512)
174
+ dropout = config.get('dropout', dropout)
 
 
175
 
176
+ model = ResidualConvAutoencoder(latent_dim=latent_dim, dropout=dropout)
177
+ model.load_state_dict(checkpoint['model_state_dict'])
178
+ model.to(device)
179
  model.eval()
180
+
181
+ return model, checkpoint
182
+
183
+
184
+ if __name__ == "__main__":
185
+ # Test the model
186
+ model = ResidualConvAutoencoder(latent_dim=512, dropout=0.1)
187
+ print(f"Model created with {sum(p.numel() for p in model.parameters()):,} parameters")
188
+
189
+ # Test forward pass
190
+ x = torch.randn(2, 3, 128, 128)
191
+ reconstructed, latent = model(x)
192
+ print(f"Input shape: {x.shape}")
193
+ print(f"Reconstructed shape: {reconstructed.shape}")
194
+ print(f"Latent shape: {latent.shape}")
195
+
196
+ # Test reconstruction error
197
+ error = model.reconstruction_error(x)
198
+ print(f"Reconstruction error shape: {error.shape}")
199
+ print(f"Mean error: {error.mean().item():.6f}")