Source code for nnabla_nas.contrib.classification.mobilenet.network

# Copyright (c) 2020 Sony Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import Counter
from collections import OrderedDict
import os

import nnabla.functions as F
import numpy as np

from .... import module as Mo
from ..base import ClassificationModel as Model
from .helper import visualize_mobilenet_arch
from .modules import CANDIDATES
from .modules import ChoiceBlock
from .modules import ConvBNReLU


def _make_divisible(x, divisible_by=8):
    r"""It ensures that all layers have a channel number that is divisible by
    divisible_by."""
    return int(np.ceil(x * 1. / divisible_by) * divisible_by)



[docs]
def label_smoothing_loss(pred, label, label_smoothing=0.1):
    loss = F.softmax_cross_entropy(pred, label)
    if label_smoothing <= 0:
        return loss
    return (1 - label_smoothing) * loss - label_smoothing \
        * F.mean(F.log_softmax(pred), axis=1, keepdims=True)




[docs]
class SearchNet(Model):
    r"""MobileNet V2 search space.

    This implementation is based on the PyTorch implementation.

    Args:
        num_classes (int): Number of classes
        width_mult (float, optional): Width multiplier - adjusts number of
            channels in each layer by this amount
        settings (list, optional): Network structure. Defaults to None.
        drop_rate (float, optional): Drop rate used in Dropout. Defaults to 0.
        candidates (list of str, optional): A list of candicates. Defaults to
            None.
        skip_connect (bool, optional): Whether the skip connect is used.
            Defaults to `True`.

    References:
        Sandler, M., Howard, A., Zhu, M., Zhmoginov, A. and Chen, L.C., 2018.
            Mobilenetv2: Inverted residuals and linear bottlenecks. In
            Proceedings of the IEEE conference on computer vision and pattern
            recognition (pp. 4510-4520).
    """

    def __init__(self,
                 name='',
                 num_classes=1000,
                 width_mult=1,
                 settings=None,
                 drop_rate=0,
                 candidates=None,
                 mode='sample',
                 skip_connect=True):

        Mo.Module.__init__(self, name=name)
        self._num_classes = num_classes
        self._width_mult = width_mult
        self._skip_connect = skip_connect
        self._arch_idx = None  # keeps current max arch
        round_nearest = 8

        in_channels = 32
        last_channel = 1280

        # building first layer
        in_channels = _make_divisible(in_channels * width_mult, round_nearest)
        self.last_channel = _make_divisible(
            last_channel * max(1.0, width_mult),
            round_nearest
        )
        features = [ConvBNReLU(3, in_channels, stride=(2, 2),
                    name='/init_conv')]

        first_cell_width = _make_divisible(16 * width_mult, 8)
        features += [CANDIDATES['MB1 3x3'](
            in_channels, first_cell_width, 1, '/init_block')]
        in_channels = first_cell_width

        if settings is None:
            settings = [
                # c, n, s
                [24, 4, 2],
                [32, 4, 2],
                [64, 4, 2],
                [96, 4, 1],
                [160, 4, 2],
                [320, 1, 1]
            ]
        self._settings = settings
        if candidates is None:
            candidates = [
                "MB3 3x3",
                "MB6 3x3",
                "MB3 5x5",
                "MB6 5x5",
                "MB3 7x7",
                "MB6 7x7"
            ]
        self._candidates = candidates
        # building inverted residual blocks
        for c, n, s in settings:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                curr_candidates = candidates.copy()
                if stride == 1 and in_channels == output_channel \
                        and skip_connect:
                    curr_candidates.append('skip_connect')
                features.append(
                    ChoiceBlock(in_channels, output_channel,
                                stride=stride, mode=mode,
                                ops=curr_candidates,
                                name='/res_block_{}'.format(i))
                )
                in_channels = output_channel

        # building last several layers
        features.append(ConvBNReLU(in_channels, self.last_channel,
                                   kernel=(1, 1), name='/final_conv'))
        # make it nn.Sequential
        self._features = Mo.Sequential(*features)

        # building classifier
        self._classifier = Mo.Sequential(
            Mo.GlobalAvgPool(name='/final_avgpool'),
            Mo.Dropout(drop_rate, name='/final_dropout'),
            Mo.Linear(self.last_channel, num_classes, name='/final_affine'),
        )

    @property
    def modules_to_profile(self):
        r"""Returns a list with the modules that will be profiled when the
        Profiler functions are called. All other modules in the network will
        not be profiled
        """
        return [Mo.Conv,
                Mo.BatchNormalization,
                Mo.ReLU6,
                Mo.GlobalAvgPool,
                Mo.Add2,
                Mo.Linear,
                ]


[docs]
    def get_net_parameters(self, grad_only=False):
        r"""Returns an `OrderedDict` containing model parameters.

        Args:
            grad_only (bool, optional): If sets to `True`, then only parameters
                with `need_grad=True` are returned. Defaults to False.

        Returns:
            OrderedDict: A dictionary containing parameters.
        """
        p = self.get_parameters(grad_only)
        return OrderedDict([(k, v) for k, v in p.items() if 'alpha' not in k])



[docs]
    def get_arch_parameters(self, grad_only=False):
        r"""Returns an `OrderedDict` containing architecture parameters.

        Args:
            grad_only (bool, optional): If sets to `True`, then only parameters
                with `need_grad=True` are returned. Defaults to False.

        Returns:
            OrderedDict: A dictionary containing parameters.
        """
        p = self.get_parameters(grad_only)
        return OrderedDict([(k, v) for k, v in p.items() if 'alpha' in k])



[docs]
    def call(self, input):
        out = self._features(input)
        return self._classifier(out)



[docs]
    def extra_repr(self):
        return (f'num_classes={self._num_classes}, '
                f'width_mult={self._width_mult}, '
                f'settings={self._settings}, '
                f'candidates={self._candidates}, '
                f'skip_connect={self._skip_connect}')



[docs]
    def summary(self):
        def print_arch(arch_idx, op_names):
            str = 'NET SUMMARY:\n'
            for k, (c, n, s) in enumerate(self._settings):
                str += 'c={:<4} : '.format(c)
                for i in range(n):
                    idx = k*n+i
                    if (self._arch_idx is None or
                            arch_idx[idx] == self._arch_idx[idx]):
                        str += ' '
                    else:
                        str += '*'
                    str += '{:<30}; '.format(op_names[arch_idx[idx]])
                str += '\n'
            return str
        stats = []
        arch_params = self.get_arch_parameters()
        arch_idx = [np.argmax(m.d.flat) for m in arch_params.values()]
        count = Counter(arch_idx)
        op_names = self._candidates.copy()
        if self._skip_connect:
            op_names += ['skip_connect']
        txt = print_arch(arch_idx, op_names)
        total = len(arch_params)
        for k in range(len(op_names)):
            name = op_names[k]
            stats.append(name + f' = {count[k]/total*100:.2f}%\t')
        if self._arch_idx is not None:
            n_changes = sum(i != j for i, j in zip(arch_idx, self._arch_idx))
            txt += '\n Number of changes: {}({:.2f}%)\n'.format(
                n_changes, n_changes*100/len(arch_idx))
        self._arch_idx = arch_idx
        return txt + ''.join(stats)



[docs]
    def visualize(self, path):
        # save the architectures
        if isinstance(self._features[2]._mixed, Mo.MixedOp):
            visualize_mobilenet_arch(self, os.path.join(path, 'arch'))



[docs]
    def loss(self, outputs, targets, loss_weights=None):
        assert len(outputs) == 1 and len(targets) == 1
        return F.mean(label_smoothing_loss(outputs[0], targets[0]))



[docs]
    def get_net_modules(self, active_only=False):
        ans = []
        for name, module in self.get_modules():
            if isinstance(module, Mo.Module):
                if active_only:
                    ans.append(module)
                else:
                    ans.append(module)
        return ans





[docs]
class TrainNet(SearchNet):
    r"""MobileNet V2 Train Net.

    Args:
        num_classes (int): Number of classes
        width_mult (float, optional): Width multiplier - adjusts number of
            channels in each layer by this amount
        settings (list, optional): Network structure.
            Defaults to None.
        round_nearest (int, optional): Round the number of channels in
            each layer to be a multiple of this number. Set to 1 to turn
            off rounding.
        n_max (int, optional): The number of blocks. Defaults to 4.
        block: Module specifying inverted residual building block for
            mobilenet. Defaults to None.
        mode (str, optional): The sampling strategy ('full', 'max', 'sample').
            Defaults to 'full'.
        skip_connect (bool, optional): Whether the skip connect is used.
            Defaults to `True`.
        genotype(str, optional): The path to architecture file. Defaults to
            None.

    References:
        [1] Sandler, M., Howard, A., Zhu, M., Zhmoginov, A. and Chen, L.C.,
            2018. Mobilenetv2: Inverted residuals and linear bottlenecks. In
            Proceedings of the IEEE conference on computer vision and pattern
            recognition (pp. 4510-4520).
    """

    def __init__(self,
                 num_classes=1000,
                 width_mult=1,
                 settings=None,
                 drop_rate=0,
                 candidates=None,
                 mode='sample',
                 skip_connect=True,
                 genotype=None):

        super().__init__(num_classes=num_classes, width_mult=width_mult,
                         settings=settings, drop_rate=drop_rate,
                         candidates=candidates, mode=mode,
                         skip_connect=skip_connect)

        if genotype is not None:
            self.load_parameters(genotype)
            for _, module in self.get_modules():
                if isinstance(module, ChoiceBlock):
                    idx = np.argmax(module._mixed._alpha.d)
                    module._mixed = module._mixed._ops[idx]
        else:
            # pick random model
            for _, module in self.get_modules():
                if isinstance(module, ChoiceBlock):
                    idx = np.random.randint(len(module._mixed._alpha.d))
                    module._mixed = module._mixed._ops[idx]