Source code for nnabla_nas.contrib.classification.zoph.zoph

# Copyright (c) 2020 Sony Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import OrderedDict

import nnabla as nn
import nnabla.functions as F
import numpy as np

from nnabla_nas.contrib.classification.base import ClassificationModel as Model

from ....module import static as smo
from ....module.parameter import Parameter


[docs] class SepConv(smo.Graph): """ A static separable convolution (DepthWise conv + PointWise conv) Args: parents (list): a list of static modules that are parents to this module in_channels (:obj:`int`): Number of convolution kernels (which is equal to the number of input channels). out_channels (:obj:`int`): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5). pad (:obj:`tuple` of :obj:`int`, optional): Padding sizes for dimensions. Defaults to None. dilation (:obj:`tuple` of :obj:`int`, optional): Dilation sizes for dimensions. Defaults to None. with_bias (bool, optional): Specify whether to include the bias term. Defaults to `True`. name (string, optional): the name of the module """ def __init__(self, parents, in_channels, out_channels, kernel, pad, dilation, with_bias, name='', eval_prob=None): smo.Graph.__init__(self, parents=parents, name=name, eval_prob=eval_prob, ) # add DepthWiseConvolution dw_conv = smo.DwConv(name='{}/dwconv'.format(self.name), parents=self.parents, eval_prob=eval_prob, in_channels=in_channels, kernel=kernel, pad=pad, dilation=dilation, with_bias=with_bias, ) self.append(dw_conv) # add PointWisewConvolution conv = smo.Conv(name='{}/pwconv'.format(self.name), parents=[dw_conv], eval_prob=eval_prob, in_channels=in_channels, out_channels=out_channels, kernel=(1, 1), pad=None, group=1, with_bias=False, ) self.append(conv)
[docs] class SepConvBN(smo.Graph): """ Two static separable convolutions followed by batchnorm and relu at the end. Args: parents (list): a list of static modules that are parents to this module out_channels (:obj:`int`): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5). dilation (:obj:`tuple` of :obj:`int`, optional): Dilation sizes for dimensions. Defaults to None. name (string, optional): the name of the module """ def __init__(self, parents, out_channels, kernel, dilation, name='', eval_prob=None): smo.Graph.__init__(self, parents=parents, name=name, eval_prob=eval_prob) self._out_channels = out_channels if dilation is None: pad = tuple([ki//2 for ki in kernel]) else: pad = tuple([(ki//2)*di for ki, di in zip(kernel, dilation)]) self.append(SepConv(parents=parents, name='{}/SepConv_1'.format(self.name), in_channels=parents[0].shape[1], out_channels=out_channels, kernel=kernel, pad=pad, dilation=dilation, with_bias=False, eval_prob=eval_prob)) self.append(SepConv(parents=[self[-1]], name='{}/SepConv_2'.format(self.name), in_channels=out_channels, out_channels=out_channels, kernel=kernel, pad=pad, dilation=dilation, with_bias=False, eval_prob=eval_prob)) self.append(smo.BatchNormalization(parents=[self[-1]], n_features=self._out_channels, name='{}/bn'.format(self.name), n_dims=4)) self.append(smo.ReLU(parents=[self[-1]], name='{}/relu'.format(self.name)))
[docs] class SepConv3x3(SepConvBN): """ A static separable convolution of shape 3x3 that applies batchnorm and relu at the end. Args: parents (list): a list of static modules that are parents to this module channels (:obj:`int`): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. name (string, optional): the name of the module """ def __init__(self, parents, channels, name='', eval_prob=None): SepConvBN.__init__(self, parents=parents, out_channels=channels, kernel=(3, 3), dilation=None, name='{}_SepConv3x3'.format(name), eval_prob=eval_prob)
[docs] class SepConv5x5(SepConvBN): """ A static separable convolution of shape 5x5 that applies batchnorm and relu at the end. Args: parents (list): a list of static modules that are parents to this module channels (:obj:`int`): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. name (string, optional): the name of the module """ def __init__(self, parents, channels, name='', eval_prob=None): SepConvBN.__init__(self, parents=parents, out_channels=channels, kernel=(5, 5), dilation=None, name='{}_SepConv5x5'.format(name), eval_prob=eval_prob)
[docs] class DilSepConv3x3(SepConvBN): """ A static dilated separable convolution of shape 3x3 that applies batchnorm and relu at the end. Args: parents (list): a list of static modules that are parents to this module channels (:obj:`int`): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. name (string, optional): the name of the module """ def __init__(self, parents, channels, name='', eval_prob=None): SepConvBN.__init__(self, parents=parents, out_channels=channels, kernel=(3, 3), dilation=(2, 2), name='{}_DilSepConv3x3'.format(name), eval_prob=eval_prob)
[docs] class DilSepConv5x5(SepConvBN): """ A static dilated separable convolution of shape 5x5 that applies batchnorm and relu at the end. Args: parents (list): a list of static modules that are parents to this module channels (:obj:`int`): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. name (string, optional): the name of the module """ def __init__(self, parents, channels, name='', eval_prob=None): SepConvBN.__init__(self, parents=parents, out_channels=channels, kernel=(5, 5), dilation=(2, 2), name='{}_DilSepConv5x5'.format(name), eval_prob=eval_prob)
[docs] class MaxPool3x3(smo.Graph): """ A static max pooling of size 3x3 followed by batch normalization and ReLU Args: parents (list): a list of static modules that are parents to this module channels (int): the number of features name (string, optional): the name of the module """ def __init__(self, parents, channels, name='', eval_prob=None): smo.Graph.__init__(self, parents=parents, name=name, eval_prob=eval_prob, ) pool = smo.MaxPool(name='{}_MaxPool3x3/maxpool'.format(name), parents=parents, eval_prob=eval_prob, kernel=(3, 3), stride=(1, 1), pad=(1, 1), ) self.append(pool) bn = smo.BatchNormalization(name='{}_MaxPool3x3/bn'.format(name), parents=[pool], eval_prob=eval_prob, n_features=channels, n_dims=4, ) self.append(bn) relu = smo.ReLU(name='{}_MaxPool3x3/relu'.format(name), parents=[bn], eval_prob=eval_prob ) self.append(relu)
[docs] class AveragePool3x3(smo.Graph): """ A static average pooling of size 3x3 followed by batch normalization and ReLU Args: parents (list): a list of static modules that are parents to this module channels (int): the number of features name (string, optional): the name of the module """ def __init__(self, parents, channels, name='', eval_prob=None): smo.Graph.__init__(self, parents=parents, name=name, eval_prob=eval_prob, ) pool = smo.AvgPool(name='{}_AveragePool3x3/avgpool'.format(name), parents=parents, eval_prob=eval_prob, kernel=(3, 3), stride=(1, 1), pad=(1, 1), ) self.append(pool) bn = smo.BatchNormalization(name='{}_AveragePool3x3/bn'.format(name), parents=[pool], eval_prob=eval_prob, n_features=channels, n_dims=4, ) self.append(bn) relu = smo.ReLU(name='{}_AveragePool3x3/relu'.format(name), parents=[bn], eval_prob=eval_prob ) self.append(relu)
ZOPH_CANDIDATES = [SepConv3x3, SepConv5x5, DilSepConv3x3, DilSepConv5x5, MaxPool3x3, AveragePool3x3, smo.Identity, smo.Zero]
[docs] class ZophBlock(smo.Graph): """ A zoph block as defined in [Bender et. al] Args: parents (list): a list of static modules that are parents to this module name (string, optional): the name of the module candidates (list): the candidate modules instantiated within this block (e.g. ZOPH_CANDIDATES) channels (int): the number of output channels of this block join_parameters (nnabla variable, optional): the architecture parameters used to join the outputs of the candidate modules. join_parameters must have the same number of elements as we have candidates. References: Bender, Gabriel. "Understanding and simplifying one-shot architecture search." (2019). """ def __init__(self, parents, candidates, channels, name='', join_parameters=None): self._candidates = candidates self._channels = channels if join_parameters is None: self._join_parameters = Parameter(shape=(len(candidates),)) else: self._join_parameters = join_parameters smo.Graph.__init__(self, parents=parents, name=name) join_prob = F.softmax(self._join_parameters) # add an input concatenation input_con = smo.Merging(name='{}/input_con'.format(self.name), parents=self.parents, mode='concat', axis=1, eval_prob=F.sum(join_prob[:-1])) self.append(input_con) input_conv = smo.Conv(name='{}/input_conv'.format(self.name), parents=[input_con], in_channels=input_con.shape[1], out_channels=self._channels, kernel=(1, 1), eval_prob=F.sum(join_prob[:-1])) self.append(input_conv) for i, ci in enumerate(self._candidates): self.append(ci(name='{}/candidate_{}'.format(self.name, i), parents=[input_conv], channels=self._channels, eval_prob=join_prob[i])) self.append(smo.Join(name='{}/join'.format(self.name), parents=self[2:], join_parameters=self._join_parameters))
[docs] class ZophCell(smo.Graph): """ A zoph cell that consists of multiple zoph blocks, as defined in [Bender et. al] Args: parents (list): a list of static modules that are parents to this module name (string, optional): the name of the module candidates (list): the candidate modules instantiated within this block (e.g. ZOPH_CANDIDATES) channels (int): the number of output channels of this block join_parameters (list of nnabla variable, optional): lift of the architecture parameters used to join the outputs of the candidate modules. Each element in join_parameters must have the same number of elements as we have candidates. The length of this list must be n_modules. References: Bender, Gabriel. "Understanding and simplifying one-shot architecture search." (2019). """ def __init__(self, parents, candidates, channels, name='', n_modules=3, reducing=False, join_parameters=[None]*3): self._candidates = candidates self._channels = channels self._n_modules = n_modules self._reducing = reducing self._join_parameters = join_parameters smo.Graph.__init__(self, parents=parents, name=name) # match the input dimensions shapes = [(list(ii.shape) + 4 * [1])[:4] for ii in self.parents] min_shape = np.min(np.array(shapes), axis=0) self._shape_adaptation = {i: np.array(si[2:]) / min_shape[2:] for i, si in enumerate(shapes) if tuple(si[2:]) != tuple(min_shape[2:])} # perform the input channel projection, using pointwise convolutions projected_inputs = [] for i, ii in enumerate(self.parents): self.append(smo.Conv(name='{}/input_conv_{}'.format(self.name, i), parents=[ii], in_channels=ii.shape[1], out_channels=self._channels, kernel=(1, 1), with_bias=False)) self.append(smo.BatchNormalization(name='{}/input_bn_{}'.format( self.name, i), parents=[self[-1]], n_dims=4, n_features=self._channels)) self.append(smo.ReLU(name='{}/input_conv_{}_relu'.format( self.name, i), parents=[self[-1]])) projected_inputs.append(self[-1]) # perform shape adaptation, using pooling, if needed for i, pii in enumerate(projected_inputs): if i in self._shape_adaptation: self.append(smo.MaxPool(name='{}/shape_adapt' '_pool_{}'.format(self.name, i), parents=[pii], kernel=self._shape_adaptation[i], stride=self._shape_adaptation[i])) projected_inputs[i] = self[-1] if self._reducing: for i, pii in enumerate(projected_inputs): self.append(smo.MaxPool(name='{}/reduce' '_pool_{}'.format(self.name, i), parents=[pii], kernel=(2, 2), stride=(2, 2))) projected_inputs[i] = self[-1] cell_modules = projected_inputs for i in range(self._n_modules): self.append(ZophBlock(name='{}/zoph' '_block_{}'.format(self.name, i), parents=cell_modules[:i+2], candidates=self._candidates, channels=self._channels, join_parameters=self._join_parameters[i])) cell_modules.append(self[-1]) # perform output concatenation self.append(smo.Merging(name=self.name+'/output_concat', parents=cell_modules, mode='concat'))
[docs] class SearchNet(Model, smo.Graph): """ A search space as defined in [Bender et. al] Args: name (string, optional): the name of the module input_shape (tuple): the shape of the network input n_classes (int): the number of output classes stem_channels (int): the number of channels for the stem convolutions cells (list): the type of the cells used within this search space cell_depth (list): the number of modules within each cell reducing (list): specifies for each cell if it reduces the feature map dimensions through pooling join_parameters (list): the join_parameters used in each cell and block. candidates (list, optional): the candidate modules instantiated within this block (e.g. ZOPH_CANDIDATES) mode (string): the mode which the join modules within this network use References: Bender, Gabriel. "Understanding and simplifying one-shot architecture search." (2019). """ def __init__(self, name='', input_shape=(3, 32, 32), n_classes=10, stem_channels=128, cells=[ZophCell]*3, cell_depth=[7]*3, cell_channels=[128, 256, 512], reducing=[False, True, True], join_parameters=[[None]*7]*3, candidates=ZOPH_CANDIDATES, mode='sample'): smo.Graph.__init__(self, parents=[], name=name) self._n_classes = n_classes self._stem_channels = stem_channels self._cells = cells self._cell_depth = cell_depth self._cell_channels = cell_channels self._join_parameters = join_parameters self._reducing = reducing self._candidates = candidates self._input_shape = (1,) + input_shape self._input = smo.Input( name='{}/input'.format(self.name), value=nn.Variable(self._input_shape)) self._mode = mode # 1. add the stem convolutions self.append(smo.Conv(name='{}/stem' '_conv_1'.format(self.name), parents=[self._input], in_channels=self._input.shape[1], out_channels=self._stem_channels, kernel=(7, 7), pad=(3, 3))) self.append(smo.BatchNormalization(name='{}/stem_bn'.format(self.name), parents=[self[-1]], n_dims=4, n_features=self._stem_channels)) self.append(smo.ReLU(name='{}/stem_relu'.format(self.name), parents=[self[-1]])) self.append(smo.Conv(name='{}/stem' '_conv_2'.format(self.name), parents=[self[-1]], in_channels=self._stem_channels, out_channels=self._stem_channels, kernel=(3, 3), pad=(1, 1))) self.append(smo.BatchNormalization(name='{}/stem2_bn'.format( self.name), parents=[self[-1]], n_dims=4, n_features=self._stem_channels)) self.append(smo.ReLU(name='{}/stem2_relu'.format(self.name), parents=[self[-1]])) # add the first 2 cells self.append(self._cells[0](name='{}/cell_{}'.format(self.name, 0), parents=[self[3], self[6]], candidates=self._candidates, n_modules=self._cell_depth[0], channels=self._cell_channels[0], join_parameters=self._join_parameters[0], reducing=self._reducing[0])) self.append(self._cells[1](name='{}/cell_{}'.format(self.name, 1), parents=[self[6], self[7]], candidates=self._candidates, n_modules=self._cell_depth[1], channels=self._cell_channels[1], join_parameters=self._join_parameters[1], reducing=self._reducing[1])) # 2. add the cells using shared architecture parameters for i, celli in enumerate(zip(self._cells[2:], self._cell_depth[2:], self._cell_channels[2:], self._join_parameters[2:], self._reducing[2:])): self.append(celli[0](name='{}/cell_{}'.format(self.name, i+2), parents=self[-2:], candidates=self._candidates, n_modules=celli[1], channels=celli[2], join_parameters=celli[3], reducing=celli[4])) # 3. add output convolutions and global average pooling layers self.append(smo.Conv(name='{}/output_conv_1'.format(self.name), parents=[self[-1]], in_channels=self[-1].shape[1], out_channels=self._n_classes, kernel=(1, 1))) self.append(smo.BatchNormalization(name='{}/output_bn'.format( self.name), parents=[self[-1]], n_dims=4, n_features=self._n_classes)) self.append(smo.ReLU(name='{}/output_relu'.format(self.name), parents=[self[-1]])) self.append(smo.GlobalAvgPool( name='{}/global_average_pool'.format(self.name), parents=[self[-1]])) self.append(smo.Collapse(name='{}/output_reshape'.format(self.name), parents=[self[-1]])) for mi in self.get_arch_modules(): mi.mode = self._mode @property def modules_to_profile(self): r"""Returns a list with the modules that will be profiled when the Profiler functions are called. All other modules in the network will not be profiled """ return [smo.Conv, smo.DwConv, smo.MaxPool, smo.AvgPool, smo.GlobalAvgPool, smo.ReLU, smo.BatchNormalization, smo.Join, smo.Merging, smo.Collapse, ] @property def input_shapes(self): return [self._input.shape]
[docs] def get_arch_modules(self): ans = [] for name, module in self.get_modules(): if isinstance(module, smo.Join): ans.append(module) return ans
[docs] def get_net_modules(self, active_only=False): ans = [] for name, module in self.get_modules(): if isinstance(module, smo.Module) and not isinstance(module, smo.Join): if active_only: if module._value is not None: ans.append(module) else: pass else: ans.append(module) return ans
[docs] def get_net_parameters(self, grad_only=False): param = OrderedDict() for key, val in self.get_parameters(grad_only).items(): if 'join' not in key: param[key] = val return param
[docs] def get_arch_parameters(self, grad_only=False): param = OrderedDict() for key, val in self.get_parameters(grad_only).items(): if 'join' in key: param[key] = val return param
def __call__(self, input): self.reset_value() self._input._value = input return self._recursive_call()
[docs] def summary(self): r"""Summary of the model.""" str_summary = '' for mi in self.get_arch_modules(): mi._sel_p.forward() str_summary += mi.name + "/" str_summary += mi.parents[np.argmax(mi._join_parameters.d)].name str_summary += "/" + str(np.max(mi._sel_p.d)) + "\n" str_summary += "Instantiated modules are:\n" for mi in self.get_net_modules(active_only=True): if isinstance(mi, smo.Module): try: mi._eval_prob.forward() except Exception: pass str_summary += mi.name + " chosen with probability " str_summary += str(mi._eval_prob.d) + "\n" return str_summary
[docs] def save_graph(self, path): """ save whole network/graph (in a PDF file) Args: path """ gvg = self.get_gv_graph() gvg.render(path + '/graph')
[docs] class TrainNet(SearchNet): """ A search space as defined in [Bender et. al]. Its the same as SearchNet, just that mode is fixed to 'max'. Args: name (string, optional): the name of the module input_shape (tuple): the shape of the network input n_classes (int): the number of output classes stem_channels (int): the number of channels for the stem convolutions cells (list): the type of the cells used within this search space cell_depth (list): the number of modules within each cell reducing (list): specifies for each cell if it reduces the feature map dimensions through pooling join_parameters (list): the join_parameters used in each cell and block candidates (list, optional): the candidate modules instantiated within this block (e.g. ZOPH_CANDIDATES) mode (string): the mode which the join modules within this network use References: Bender, Gabriel. "Understanding and simplifying one-shot architecture search." (2019). """ def __init__(self, name, input_shape=(3, 32, 32), n_classes=10, stem_channels=128, cells=[ZophCell]*3, cell_depth=[7]*3, cell_channels=[128, 256, 512], reducing=[False, True, True], join_parameters=[[None]*7]*3, candidates=ZOPH_CANDIDATES, param_path=None, *args, **kwargs): SearchNet.__init__(self, name=name, input_shape=input_shape, n_classes=n_classes, stem_channels=stem_channels, cells=cells, cell_depth=cell_depth, reducing=reducing, join_parameters=join_parameters, candidates=ZOPH_CANDIDATES, mode='max') if param_path is not None: self.load_parameters(param_path)