多模态 | 基于GNN的多模态情感识别技术COGMEN项目复现

COGMEN: COntextualized GNN based Multimodal Emotion recognitioN

COGMEN: 基于GNN的多模态情感识别技术

Paper:  https://arxiv.org/abs/2205.02455 

源代码GitHub - Exploration-Lab/COGMEN

论文翻译及总结可参考我另外一篇博文:多模态 |COGMEN: COntextualized GNN based Multimodal Emotion recognitioN论文详解_夏天|여름이다的博客-CSDN博客

环境设置

  • Pytorch
  • python3.7版本及以上

如果安装GPU版本,cuda版本需要11.7及以上

  • pytorch_geometric

Installation — pytorch_geometric documentation (pytorch-geometric.readthedocs.io)

conmet.ml

  •  SBERT

Install SBERT

 对于已经有pytorch的情况,我只安装了以下命令

pip install comet_ml --upgrade #使用默认的 Python,comet_ml升级到最新版本
pip install torch_geometric
pip install -U SentenceTransformer

数据预处理

数据集:iemocap_4

对数据集进行处理,运行

python preprocess.py --dataset="iemocap_4"

运行后,结果如图

 开始训练

python train.py --dataset="iemocap_4" --modalities="atv" --from_begin --epochs=55

训练效果如图

训练后,生成model_checkpoints目录,如下

 进行验证

python eval.py --dataset="iemocap_4" --modalities="atv"

结果如图

代码详解ING

COGMEN/cogmen/model/COGMEN.py

import torch
import torch.nn as nn

from .SeqContext import SeqContext
from .GNN import GNN
from .Classifier import Classifier
from .functions import batch_graphify
import cogmen

log = cogmen.utils.get_logger()

#定义网络模型
class COGMEN(nn.Module):
    def __init__(self, args):
        super(COGMEN, self).__init__()
        u_dim = 100
        if args.rnn == "transformer":
            g_dim = args.hidden_size
        else:
            g_dim = 200
        h1_dim = args.hidden_size
        h2_dim = args.hidden_size
        hc_dim = args.hidden_size

        #数据集的标签设置
        dataset_label_dict = {
            "iemocap": {"hap": 0, "sad": 1, "neu": 2, "ang": 3, "exc": 4, "fru": 5},
            "iemocap_4": {"hap": 0, "sad": 1, "neu": 2, "ang": 3},
            "mosei": {"Negative": 0, "Positive": 1},
        }
         #数据集的人员speaker设置
        dataset_speaker_dict = {
            "iemocap": 2,
            "iemocap_4": 2,
            "mosei": 1,
        }

        if args.dataset and args.emotion == "multilabel":
            dataset_label_dict["mosei"] = {
                "happiness": 0,
                "sadness": 1,
                "anger": 2,
                "surprise": 3,
                "disgust": 4,
                "fear": 5,
            }

        tag_size = len(dataset_label_dict[args.dataset])
        args.n_speakers = dataset_speaker_dict[args.dataset]
        self.concat_gin_gout = args.concat_gin_gout

        self.wp = args.wp
        self.wf = args.wf
        self.device = args.device

        self.rnn = SeqContext(u_dim, g_dim, args)
        self.gcn = GNN(g_dim, h1_dim, h2_dim, args)
        if args.concat_gin_gout:
            self.clf = Classifier(
                g_dim + h2_dim * args.gnn_nheads, hc_dim, tag_size, args
            )
        else:
            self.clf = Classifier(h2_dim * args.gnn_nheads, hc_dim, tag_size, args)

        edge_type_to_idx = {}
        for j in range(args.n_speakers):
            for k in range(args.n_speakers):
                edge_type_to_idx[str(j) + str(k) + "0"] = len(edge_type_to_idx)
                edge_type_to_idx[str(j) + str(k) + "1"] = len(edge_type_to_idx)
        self.edge_type_to_idx = edge_type_to_idx
        log.debug(self.edge_type_to_idx)

    def get_rep(self, data):
        # [batch_size, mx_len, D_g]
        node_features = self.rnn(data["text_len_tensor"], data["input_tensor"])
        features, edge_index, edge_type, edge_index_lengths = batch_graphify(
            node_features,
            data["text_len_tensor"],
            data["speaker_tensor"],
            self.wp,
            self.wf,
            self.edge_type_to_idx,
            self.device,
        )

        graph_out = self.gcn(features, edge_index, edge_type)
        return graph_out, features

    def forward(self, data):
        graph_out, features = self.get_rep(data)
        if self.concat_gin_gout:
            out = self.clf(
                torch.cat([features, graph_out], dim=-1), data["text_len_tensor"]
            )
        else:
            out = self.clf(graph_out, data["text_len_tensor"])

        return out

    def get_loss(self, data):
        graph_out, features = self.get_rep(data)
        if self.concat_gin_gout:
            loss = self.clf.get_loss(
                torch.cat([features, graph_out], dim=-1),
                data["label_tensor"],
                data["text_len_tensor"],
            )
        else:
            loss = self.clf.get_loss(
                graph_out, data["label_tensor"], data["text_len_tensor"]
            )

        return loss

COGMEN/cogmen/Optim.py

import torch.optim as optim
from torch.nn.utils import clip_grad_value_
from torch.optim import lr_scheduler


#定义优化器类
class Optim:
    def __init__(self, lr, max_grad_value, weight_decay):
        self.lr = lr
        self.max_grad_value = max_grad_value
        self.weight_decay = weight_decay
        self.params = None
        self.optimizer = None

    def set_parameters(self, params, name):
        self.params = list(params)
        #优化器优先默认sgd
        if name == "sgd":
            self.optimizer = optim.SGD(
                self.params, lr=self.lr, weight_decay=self.weight_decay
            )
        elif name == "rmsprop":
            self.optimizer = optim.RMSprop(
                self.params, lr=self.lr, weight_decay=self.weight_decay
            )
        elif name == "adam":
            self.optimizer = optim.Adam(
                self.params, lr=self.lr, weight_decay=self.weight_decay
            )
        elif name == "adamw":
            self.optimizer = optim.AdamW(
                self.params, lr=self.lr, weight_decay=self.weight_decay
            )

    def get_scheduler(self, sch):
        print("Using Scheduler")
        if sch == "reduceLR":
            sched = lr_scheduler.ReduceLROnPlateau(self.optimizer, "min")
        elif sch == "expLR":
            sched = ExponentialLR(self.optimizer, gamma=0.9)
        return sched

    def step(self):
        if self.max_grad_value != -1:
            clip_grad_value_(self.params, self.max_grad_value)
        self.optimizer.step()

    def load_state_dict(self, state_dict):
        self.optimizer.load_state_dict(state_dict)

COGMEN/cogmen/Dataset.py

#导入SBERT
from sentence_transformers import SentenceTransformer

#使用预训练模型 paraphrase-distilroberta-base-v1:
sbert_model = SentenceTransformer("paraphrase-distilroberta-base-v1")

#定义样本类:声音编号,人员编号,标签,文本,语音,视觉,通过sbert编码后的向量值
class Sample:
    def __init__(self, vid, speaker, label, text, audio, visual, sentence):
        self.vid = vid
        self.speaker = speaker
        self.label = label
        self.text = text
        self.audio = audio
        self.visual = visual
        self.sentence = sentence
        self.sbert_sentence_embeddings = sbert_model.encode(sentence)

猜你喜欢

转载自blog.csdn.net/weixin_44649780/article/details/129927025