I have a model:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = nn.Conv2d(128, 128, (3,3))
self.conv2 = nn.Conv2d(128, 256, (3,3))
self.conv3 = nn.Conv2d(256, 256, (3,3))
def forward(self,):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
return x
model = MyModel()
I want to train model in such a way that in every training step DATA_X1 should train
['conv1', 'conv2', 'conv3'] layers and DATA_X2 should train only ['conv3'] layers.
I tried making two optimizer:
# Full parameters train
all_params = model.parameters()
all_optimizer = optim.Adam(all_params, lr=0.01)
# Partial parameters train
partial_params = model.parameters()
for p, (name, param) in zip(list(partial_params), model.named_parameters()):
if name in ['conv3']:
p.requires_grad = True
else:
p.requires_grad = False
partial_optimizer = optim.Adam(partial_params, lr=0.01)
But this affects both the optimizer with required_grad = False
Is there any way I can do this?
Why not build this functionality into the model?
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = nn.Conv2d(128, 128, (3,3))
self.conv2 = nn.Conv2d(128, 256, (3,3))
self.conv3 = nn.Conv2d(256, 256, (3,3))
self.partial_grad = False # a flag
def forward(self, x):
if self.partial_grad:
with torch.no_grad():
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
else:
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
return x
Now you can have a single optimizer with all the parameters, and you can switch model.partial_grad on and off according to your training data:
optimizer.zero_grad()
model.partial_grad = False # prep for DATA_X1 training
x1, y1 = DATA_X1.item() # this is not really a code, but you get the point
out = model(x1)
loss = criterion(out, y1)
loss.backward()
optimizer.step()
# do a partial opt for DATA_X2
optimizer.zero_grad()
model.partial_grad = True # prep for DATA_X2 training
x2, y2 = DATA_X2.item() # this is not really a code, but you get the point
out = model(x2)
loss = criterion(out, y2)
loss.backward()
optimizer.step()
Having a single optimizer should be more beneficial since you can track the momentum and the change of parameters across both datasets.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With