Spaces:

BorisEm
/

HATSAT

Running on CPU Upgrade

App Files Files Community

HATSAT / app_old.py

BorisEm

Fix HAT model architecture to match checkpoint structure

34e77ba 3 months ago

raw

history blame

26.3 kB

	import gradio as gr
	import torch
	import torch.nn as nn
	import numpy as np
	from PIL import Image
	import cv2
	import math
	from einops import rearrange


	def to_2tuple(x):
	"""Convert input to tuple of length 2."""
	if isinstance(x, (tuple, list)):
	return tuple(x)
	return (x, x)


	def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
	"""Truncated normal initialization."""
	def norm_cdf(x):
	return (1. + math.erf(x / math.sqrt(2.))) / 2.

	with torch.no_grad():
	l = norm_cdf((a - mean) / std)
	u = norm_cdf((b - mean) / std)
	tensor.uniform_(2 * l - 1, 2 * u - 1)
	tensor.erfinv_()
	tensor.mul_(std * math.sqrt(2.))
	tensor.add_(mean)
	tensor.clamp_(min=a, max=b)
	return tensor


	def drop_path(x, drop_prob: float = 0., training: bool = False):
	if drop_prob == 0. or not training:
	return x
	keep_prob = 1 - drop_prob
	shape = (x.shape[0], ) + (1, ) * (x.ndim - 1)
	random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
	random_tensor.floor_()
	output = x.div(keep_prob) * random_tensor
	return output


	class DropPath(nn.Module):
	def __init__(self, drop_prob=None):
	super(DropPath, self).__init__()
	self.drop_prob = drop_prob

	def forward(self, x):
	return drop_path(x, self.drop_prob, self.training)


	class ChannelAttention(nn.Module):
	def __init__(self, num_feat, squeeze_factor=16):
	super(ChannelAttention, self).__init__()
	self.attention = nn.Sequential(
	nn.AdaptiveAvgPool2d(1),
	nn.Conv2d(num_feat, num_feat // squeeze_factor, 1, padding=0),
	nn.ReLU(inplace=True),
	nn.Conv2d(num_feat // squeeze_factor, num_feat, 1, padding=0),
	nn.Sigmoid())

	def forward(self, x):
	y = self.attention(x)
	return x * y


	class CAB(nn.Module):
	def __init__(self, num_feat, compress_ratio=3, squeeze_factor=30):
	super(CAB, self).__init__()
	self.cab = nn.Sequential(
	nn.Conv2d(num_feat, num_feat // compress_ratio, 3, 1, 1),
	nn.GELU(),
	nn.Conv2d(num_feat // compress_ratio, num_feat, 3, 1, 1),
	ChannelAttention(num_feat, squeeze_factor)
	)

	def forward(self, x):
	return self.cab(x)


	class Mlp(nn.Module):
	def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
	super().__init__()
	out_features = out_features or in_features
	hidden_features = hidden_features or in_features
	self.fc1 = nn.Linear(in_features, hidden_features)
	self.act = act_layer()
	self.fc2 = nn.Linear(hidden_features, out_features)
	self.drop = nn.Dropout(drop)

	def forward(self, x):
	x = self.fc1(x)
	x = self.act(x)
	x = self.drop(x)
	x = self.fc2(x)
	x = self.drop(x)
	return x


	def window_partition(x, window_size):
	B, H, W, C = x.shape
	x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
	windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
	return windows


	def window_reverse(windows, window_size, H, W):
	B = int(windows.shape[0] / (H * W / window_size / window_size))
	x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
	x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
	return x


	class WindowAttention(nn.Module):
	def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
	super().__init__()
	self.dim = dim
	self.window_size = window_size
	self.num_heads = num_heads
	head_dim = dim // num_heads
	self.scale = qk_scale or head_dim ** -0.5

	self.relative_position_bias_table = nn.Parameter(
	torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))

	coords_h = torch.arange(self.window_size[0])
	coords_w = torch.arange(self.window_size[1])
	coords = torch.stack(torch.meshgrid([coords_h, coords_w]))
	coords_flatten = torch.flatten(coords, 1)
	relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]
	relative_coords = relative_coords.permute(1, 2, 0).contiguous()
	relative_coords[:, :, 0] += self.window_size[0] - 1
	relative_coords[:, :, 1] += self.window_size[1] - 1
	relative_coords[:, :, 0] = 2 self.window_size[1] - 1
	relative_position_index = relative_coords.sum(-1)
	self.register_buffer("relative_position_index", relative_position_index)

	self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
	self.attn_drop = nn.Dropout(attn_drop)
	self.proj = nn.Linear(dim, dim)
	self.proj_drop = nn.Dropout(proj_drop)

	nn.init.trunc_normal_(self.relative_position_bias_table, std=.02)
	self.softmax = nn.Softmax(dim=-1)

	def forward(self, x, mask=None):
	B_, N, C = x.shape
	qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
	q, k, v = qkv[0], qkv[1], qkv[2]

	q = q * self.scale
	attn = (q @ k.transpose(-2, -1))

	relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
	self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)
	relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()
	attn = attn + relative_position_bias.unsqueeze(0)

	if mask is not None:
	nW = mask.shape[0]
	attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
	attn = attn.view(-1, self.num_heads, N, N)
	attn = self.softmax(attn)
	else:
	attn = self.softmax(attn)

	attn = self.attn_drop(attn)

	x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
	x = self.proj(x)
	x = self.proj_drop(x)
	return x


	class HAB(nn.Module):
	def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
	mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
	act_layer=nn.GELU, norm_layer=nn.LayerNorm, compress_ratio=3, squeeze_factor=30):
	super().__init__()
	self.dim = dim
	self.input_resolution = input_resolution
	self.num_heads = num_heads
	self.window_size = window_size
	self.shift_size = shift_size
	self.mlp_ratio = mlp_ratio
	if min(self.input_resolution) <= self.window_size:
	self.shift_size = 0
	self.window_size = min(self.input_resolution)
	assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"

	self.norm1 = norm_layer(dim)
	self.attn = WindowAttention(
	dim, window_size=(self.window_size, self.window_size), num_heads=num_heads,
	qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)

	self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
	self.norm2 = norm_layer(dim)
	mlp_hidden_dim = int(dim * mlp_ratio)
	self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)

	self.conv_scale = nn.Parameter(torch.ones(1))
	self.conv_block = CAB(dim, compress_ratio, squeeze_factor)

	if self.shift_size > 0:
	H, W = self.input_resolution
	img_mask = torch.zeros((1, H, W, 1))
	h_slices = (slice(0, -self.window_size),
	slice(-self.window_size, -self.shift_size),
	slice(-self.shift_size, None))
	w_slices = (slice(0, -self.window_size),
	slice(-self.window_size, -self.shift_size),
	slice(-self.shift_size, None))
	cnt = 0
	for h in h_slices:
	for w in w_slices:
	img_mask[:, h, w, :] = cnt
	cnt += 1

	mask_windows = window_partition(img_mask, self.window_size)
	mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
	attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
	attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
	else:
	attn_mask = None

	self.register_buffer("attn_mask", attn_mask)

	def forward(self, x):
	H, W = self.input_resolution
	B, L, C = x.shape
	assert L == H * W, "input feature has wrong size"

	shortcut = x
	x = self.norm1(x)
	x = x.view(B, H, W, C)

	if self.shift_size > 0:
	shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
	else:
	shifted_x = x

	x_windows = window_partition(shifted_x, self.window_size)
	x_windows = x_windows.view(-1, self.window_size * self.window_size, C)

	attn_windows = self.attn(x_windows, mask=self.attn_mask)

	attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
	shifted_x = window_reverse(attn_windows, self.window_size, H, W)

	if self.shift_size > 0:
	x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
	else:
	x = shifted_x
	x = x.view(B, H * W, C)

	x = shortcut + self.drop_path(x)

	y = x
	x = self.norm2(x)
	x = self.mlp(x)
	x = y + self.drop_path(x)

	conv_x = self.conv_block(x.view(B, H, W, C).permute(0, 3, 1, 2))
	conv_x = conv_x.permute(0, 2, 3, 1).view(B, H * W, C)

	x = x + self.conv_scale * conv_x

	return x


	class OCAB(nn.Module):
	def __init__(self, dim, input_resolution, window_size, overlap_ratio, num_heads,
	mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
	drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, compress_ratio=3,
	squeeze_factor=30):
	super().__init__()
	self.dim = dim
	self.input_resolution = input_resolution
	self.window_size = window_size
	self.num_heads = num_heads
	self.shift_size = round(overlap_ratio * window_size)
	self.mlp_ratio = mlp_ratio

	if min(self.input_resolution) <= self.window_size:
	self.shift_size = 0
	self.window_size = min(self.input_resolution)

	assert 0 <= self.shift_size, "shift_size >= 0 is required"

	self.norm1 = norm_layer(dim)
	self.attn = WindowAttention(
	dim, window_size=(self.window_size, self.window_size), num_heads=num_heads,
	qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)

	self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
	self.norm2 = norm_layer(dim)
	mlp_hidden_dim = int(dim * mlp_ratio)
	self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)

	self.conv_scale = nn.Parameter(torch.ones(1))
	self.conv_block = CAB(dim, compress_ratio, squeeze_factor)

	def forward(self, x):
	H, W = self.input_resolution
	B, L, C = x.shape
	assert L == H * W, "input feature has wrong size"

	shortcut = x
	x = self.norm1(x)
	x = x.view(B, H, W, C)

	pad_l = pad_t = 0
	pad_r = (self.window_size - W % self.window_size) % self.window_size
	pad_b = (self.window_size - H % self.window_size) % self.window_size
	x = torch.nn.functional.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
	_, Hp, Wp, _ = x.shape

	if self.shift_size > 0:
	shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
	else:
	shifted_x = x

	x_windows = window_partition(shifted_x, self.window_size)
	x_windows = x_windows.view(-1, self.window_size * self.window_size, C)

	attn_windows = self.attn(x_windows, mask=None)

	attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
	shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)

	if self.shift_size > 0:
	x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
	else:
	x = shifted_x

	if pad_r > 0 or pad_b > 0:
	x = x[:, :H, :W, :].contiguous()

	x = x.view(B, H * W, C)
	x = shortcut + self.drop_path(x)

	y = x
	x = self.norm2(x)
	x = self.mlp(x)
	x = y + self.drop_path(x)

	conv_x = self.conv_block(x.view(B, H, W, C).permute(0, 3, 1, 2))
	conv_x = conv_x.permute(0, 2, 3, 1).view(B, H * W, C)

	x = x + self.conv_scale * conv_x

	return x


	class PatchEmbed(nn.Module):
	def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
	super().__init__()
	img_size = (img_size, img_size)
	patch_size = (patch_size, patch_size)
	patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
	self.img_size = img_size
	self.patch_size = patch_size
	self.patches_resolution = patches_resolution
	self.num_patches = patches_resolution[0] * patches_resolution[1]

	self.in_chans = in_chans
	self.embed_dim = embed_dim

	self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
	if norm_layer is not None:
	self.norm = norm_layer(embed_dim)
	else:
	self.norm = None

	def forward(self, x):
	B, C, H, W = x.shape
	assert H == self.img_size[0] and W == self.img_size[1], \
	f"Input image size ({H}{W}) doesn't match model ({self.img_size[0]}{self.img_size[1]})."
	x = self.proj(x).flatten(2).transpose(1, 2)
	if self.norm is not None:
	x = self.norm(x)
	return x


	class PatchUnEmbed(nn.Module):
	def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
	super().__init__()
	img_size = (img_size, img_size)
	patch_size = (patch_size, patch_size)
	patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
	self.img_size = img_size
	self.patch_size = patch_size
	self.patches_resolution = patches_resolution
	self.num_patches = patches_resolution[0] * patches_resolution[1]

	self.in_chans = in_chans
	self.embed_dim = embed_dim

	def forward(self, x, x_size):
	H, W = x_size
	B, HW, C = x.shape
	x = x.transpose(1, 2).view(B, self.embed_dim, H, W)
	return x


	class RHAG(nn.Module):
	def __init__(self, dim, input_resolution, depth, num_heads, window_size, compress_ratio,
	squeeze_factor, conv_scale, overlap_ratio, mlp_ratio=4., qkv_bias=True, qk_scale=None,
	drop=0., attn_drop=0., drop_path=0., norm_layer=nn.LayerNorm, downsample=None,
	use_checkpoint=False):
	super().__init__()
	self.dim = dim
	self.input_resolution = input_resolution
	self.depth = depth
	self.use_checkpoint = use_checkpoint

	self.blocks_1 = nn.ModuleList([
	HAB(dim=dim, input_resolution=input_resolution,
	num_heads=num_heads, window_size=window_size,
	shift_size=0 if (i % 2 == 0) else window_size // 2,
	mlp_ratio=mlp_ratio,
	qkv_bias=qkv_bias, qk_scale=qk_scale,
	drop=drop, attn_drop=attn_drop,
	drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
	norm_layer=norm_layer, compress_ratio=compress_ratio,
	squeeze_factor=squeeze_factor)
	for i in range(depth // 2)])

	self.blocks_2 = nn.ModuleList([
	OCAB(dim=dim, input_resolution=input_resolution,
	window_size=window_size, overlap_ratio=overlap_ratio,
	num_heads=num_heads, mlp_ratio=mlp_ratio,
	qkv_bias=qkv_bias, qk_scale=qk_scale,
	drop=drop, attn_drop=attn_drop,
	drop_path=drop_path[i + depth//2] if isinstance(drop_path, list) else drop_path,
	norm_layer=norm_layer, compress_ratio=compress_ratio,
	squeeze_factor=squeeze_factor)
	for i in range(depth // 2)])

	self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
	self.conv_scale = conv_scale

	if downsample is not None:
	self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
	else:
	self.downsample = None

	def forward(self, x, x_size):
	H, W = x_size
	res = x
	for blk in self.blocks_1:
	if self.use_checkpoint:
	x = torch.utils.checkpoint.checkpoint(blk, x)
	else:
	x = blk(x)
	for blk in self.blocks_2:
	if self.use_checkpoint:
	x = torch.utils.checkpoint.checkpoint(blk, x)
	else:
	x = blk(x)

	conv_x = self.conv(x.transpose(1, 2).view(-1, self.dim, H, W)).view(-1, self.dim, H * W).transpose(1, 2)
	x = res + x + conv_x * self.conv_scale

	if self.downsample is not None:
	x = self.downsample(x)
	return x


	class Upsample(nn.Sequential):
	def __init__(self, scale, num_feat):
	m = []
	if (scale & (scale - 1)) == 0:
	for _ in range(int(math.log(scale, 2))):
	m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
	m.append(nn.PixelShuffle(2))
	elif scale == 3:
	m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
	m.append(nn.PixelShuffle(3))
	else:
	raise ValueError(f'scale {scale} is not supported. Supported scales: 2^n and 3.')
	super(Upsample, self).__init__(*m)


	class HAT(nn.Module):
	def __init__(self, img_size=64, patch_size=1, in_chans=3, embed_dim=180, depths=[6, 6, 6, 6, 6, 6],
	num_heads=[6, 6, 6, 6, 6, 6], window_size=16, compress_ratio=3, squeeze_factor=30,
	conv_scale=0.01, overlap_ratio=0.5, mlp_ratio=4., qkv_bias=True, qk_scale=None,
	drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, norm_layer=nn.LayerNorm,
	ape=False, patch_norm=True, use_checkpoint=False, upscale=2, img_range=1.,
	upsampler='', resi_connection='1conv', **kwargs):
	super(HAT, self).__init__()

	self.window_size = window_size
	self.shift_size = window_size // 2
	self.overlap_ratio = overlap_ratio
	num_in_ch = in_chans
	num_out_ch = in_chans
	num_feat = 64
	self.img_range = img_range
	if in_chans == 3:
	rgb_mean = (0.4488, 0.4371, 0.4040)
	self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
	else:
	self.mean = torch.zeros(1, 1, 1, 1)
	self.upscale = upscale
	self.upsampler = upsampler

	self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)

	self.num_layers = len(depths)
	self.embed_dim = embed_dim
	self.ape = ape
	self.patch_norm = patch_norm
	self.num_features = embed_dim
	self.mlp_ratio = mlp_ratio

	self.patch_embed = PatchEmbed(
	img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
	norm_layer=norm_layer if self.patch_norm else None)
	num_patches = self.patch_embed.num_patches
	patches_resolution = self.patch_embed.patches_resolution
	self.patches_resolution = patches_resolution

	self.patch_unembed = PatchUnEmbed(
	img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
	norm_layer=norm_layer if self.patch_norm else None)

	if self.ape:
	self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
	nn.init.trunc_normal_(self.absolute_pos_embed, std=.02)

	self.pos_drop = nn.Dropout(p=drop_rate)

	dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]

	self.layers = nn.ModuleList()
	for i_layer in range(self.num_layers):
	layer = RHAG(dim=embed_dim,
	input_resolution=(patches_resolution[0],
	patches_resolution[1]),
	depth=depths[i_layer],
	num_heads=num_heads[i_layer],
	window_size=window_size,
	compress_ratio=compress_ratio,
	squeeze_factor=squeeze_factor,
	conv_scale=conv_scale,
	overlap_ratio=overlap_ratio,
	mlp_ratio=self.mlp_ratio,
	qkv_bias=qkv_bias, qk_scale=qk_scale,
	drop=drop_rate, attn_drop=attn_drop_rate,
	drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
	norm_layer=norm_layer,
	downsample=None,
	use_checkpoint=use_checkpoint)
	self.layers.append(layer)
	self.norm = norm_layer(self.num_features)

	if resi_connection == '1conv':
	self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
	elif resi_connection == '3conv':
	self.conv_after_body = nn.Sequential(nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
	nn.LeakyReLU(negative_slope=0.2, inplace=True),
	nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
	nn.LeakyReLU(negative_slope=0.2, inplace=True),
	nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1))

	if upsampler == 'pixelshuffle':
	self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
	nn.LeakyReLU(inplace=True))
	self.upsample = Upsample(upscale, num_feat)
	self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)

	self.apply(self._init_weights)

	def _init_weights(self, m):
	if isinstance(m, nn.Linear):
	nn.init.trunc_normal_(m.weight, std=.02)
	if isinstance(m, nn.Linear) and m.bias is not None:
	nn.init.constant_(m.bias, 0)
	elif isinstance(m, nn.LayerNorm):
	nn.init.constant_(m.bias, 0)
	nn.init.constant_(m.weight, 1.0)

	@torch.jit.ignore
	def no_weight_decay(self):
	return {'absolute_pos_embed'}

	@torch.jit.ignore
	def no_weight_decay_keywords(self):
	return {'relative_position_bias_table'}

	def forward_features(self, x):
	x_size = (x.shape[2], x.shape[3])
	x = self.patch_embed(x)
	if self.ape:
	x = x + self.absolute_pos_embed
	x = self.pos_drop(x)

	for layer in self.layers:
	x = layer(x, x_size)

	x = self.norm(x)
	x = self.patch_unembed(x, x_size)

	return x

	def forward(self, x):
	self.mean = self.mean.type_as(x)
	x = (x - self.mean) * self.img_range

	x_first = self.conv_first(x)
	res = self.conv_after_body(self.forward_features(x_first)) + x_first
	if self.upsampler == 'pixelshuffle':
	x = self.conv_before_upsample(res)
	x = self.conv_last(self.upsample(x))

	x = x / self.img_range + self.mean

	return x


	# Load the model
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	model = HAT(
	upscale=4,
	in_chans=3,
	img_size=128,
	window_size=16,
	compress_ratio=3,
	squeeze_factor=30,
	conv_scale=0.01,
	overlap_ratio=0.5,
	img_range=1.,
	depths=[6, 6, 6, 6, 6, 6],
	embed_dim=180,
	num_heads=[6, 6, 6, 6, 6, 6],
	mlp_ratio=2,
	upsampler='pixelshuffle',
	resi_connection='1conv'
	)

	# Load the fine-tuned weights
	checkpoint = torch.load('net_g_20000.pth', map_location=device)
	if 'params_ema' in checkpoint:
	model.load_state_dict(checkpoint['params_ema'])
	elif 'params' in checkpoint:
	model.load_state_dict(checkpoint['params'])
	else:
	model.load_state_dict(checkpoint)

	model.to(device)
	model.eval()


	def upscale_image(image):
	# Convert PIL image to tensor
	img_np = np.array(image).astype(np.float32) / 255.0
	img_tensor = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0).to(device)

	# Ensure the image dimensions are multiples of window_size
	h, w = img_tensor.shape[2], img_tensor.shape[3]

	# Pad if necessary
	pad_h = (16 - h % 16) % 16
	pad_w = (16 - w % 16) % 16

	if pad_h > 0 or pad_w > 0:
	img_tensor = torch.nn.functional.pad(img_tensor, (0, pad_w, 0, pad_h), mode='reflect')

	with torch.no_grad():
	output = model(img_tensor)

	# Remove padding if it was added
	if pad_h > 0 or pad_w > 0:
	output = output[:, :, :h4, :w4]

	# Convert back to PIL image
	output_np = output.squeeze(0).permute(1, 2, 0).cpu().numpy()
	output_np = np.clip(output_np * 255.0, 0, 255).astype(np.uint8)

	return Image.fromarray(output_np)


	# Gradio interface
	iface = gr.Interface(
	fn=upscale_image,
	inputs=gr.Image(type="pil", label="Input Satellite Image"),
	outputs=gr.Image(type="pil", label="Super-Resolution Output (4x)"),
	title="HAT Super-Resolution for Satellite Images",
	description="Upload a satellite image to enhance its resolution by 4x using a fine-tuned HAT model. This model has been specifically trained on satellite imagery to provide high-quality super-resolution results.",
	examples=None,
	cache_examples=False
	)

	if __name__ == "__main__":
	iface.launch()