TFS 源码分析 写文件操作 DataServer端

转自:http://blog.chinaunix.net/uid-20761674-id-75100.html

本文主要介绍dataserver端在写文件过程中的任务
 

1. DataServer处理client发送过来的CreateFileMessage消息,调用create_file_number函数,主要功能是获取file_number返回

int DataService::create_file_number(CreateFilenameMessage* message)
    {
      uint32_t block_id = message->get_block_id();
      uint64_t file_id = message->get_file_id();

      TBSYS_LOG(DEBUG, "create file: blockid: %u, fileid: %" PRI64_PREFIX "u", block_id, file_id);
      uint64_t file_number = 0;

      //在这里获取file_number,file_number=++file_number_,filenumber_是一个递增变量
      int ret = data_management_.create_file(block_id, file_id,file_number);
      if (ret)
      {
        if (EXIT_NO_LOGICBLOCK_ERROR == ret) //need to update BlockInfo

        {
          TBSYS_LOG(ERROR, "create file: blockid: %u is lost. ask master to update.", block_id);
          if (TFS_SUCCESS !=ds_requester_.req_update_block_info(block_id, UPDATE_BLOCK_MISSING))
          {
            TBSYS_LOG(ERROR, "create file: blockid: %u is null. req update BlockInfo failed", block_id);
          }
        }
        return MessageFactory::send_error_message(message,TBSYS_LOG_LEVEL(ERROR), data_server_info_.id_,
            "create file failed. blockid: %u, fileid: %" PRI64_PREFIX "u, ret: %d.", block_id, file_id, ret);
      }

      RespCreateFilenameMessage* resp_cfn_msg = newRespCreateFilenameMessage();
      resp_cfn_msg->set_block_id(block_id);
      resp_cfn_msg->set_file_id(file_id);
      resp_cfn_msg->set_file_number(file_number);
      message->reply_message(resp_cfn_msg);

      TBSYS_LOG(DEBUG,
          "create file successful. blockid: %u, fileid: %" PRI64_PREFIX "u, filenumber: %" PRI64_PREFIX "u", block_id,
          file_id, file_number);
      return TFS_SUCCESS;
    }

2.  DS接收client发送的writeDataMessage消息,调用write_data()函数

primary在本节点写入数据,同时向其他DS发送数据,DS在写完数据之后给primaryDS返回消息

int DataService::write_data(WriteDataMessage* message)
    {
      WriteDataInfo write_info = message->get_write_info();
     //注意lease_id和version在保证数据一致性上的作用

      int32_t lease_id = message->get_lease_id();
      int32_t version = message->get_block_version();
      char* msg_data = message->get_data();

      TBSYS_LOG(
          DEBUG,
          "write data start, blockid: %u, fileid: %" PRI64_PREFIX "u, filenumber: %" PRI64_PREFIX "u, version: %u, leaseid: %u, isserver: %d\n",
          write_info.block_id_, write_info.file_id_,write_info.file_number_, version, lease_id, write_info.is_server_);

      UpdateBlockType repair = UPDATE_BLOCK_NORMAL;
     //在本节点写入数据(primary和非primaryDS)

      int ret = data_management_.write_data(write_info, lease_id, version,msg_data, repair);
      if (EXIT_NO_LOGICBLOCK_ERROR == ret)
      {
        return MessageFactory::send_error_message(message,TBSYS_LOG_LEVEL(ERROR), data_server_info_.id_,
            "write data failed. block is not exist. blockid: %u, fileid: %" PRI64_PREFIX "u, ret: %d",
            write_info.block_id_, write_info.file_id_, ret);
      }
      else if (EXIT_BLOCK_DS_VERSION_ERROR == ret ||EXIT_BLOCK_NS_VERSION_ERROR == ret)
      {
        MessageFactory::send_error_message(
            message,
            TBSYS_LOG_LEVEL(ERROR),
            data_server_info_.id_,
            "write data failed. block version error. blockid: %u, fileid: %" PRI64_PREFIX "u, error ret: %d, repair: %d",
            write_info.block_id_, write_info.file_id_, ret, repair);
        if (TFS_SUCCESS !=ds_requester_.req_update_block_info(write_info.block_id_, repair))
        {
          TBSYS_LOG(ERROR, "req update block info failed. blockid: %u, repair: %d", write_info.block_id_, repair);
        }
        return TFS_SUCCESS;
      }
      else if (EXIT_DATAFILE_OVERLOAD == ret || EXIT_DATA_FILE_ERROR == ret)
      {
        if (Master_Server_Role == write_info.is_server_)
        {
          ds_requester_.req_block_write_complete(write_info.block_id_,lease_id, TFS_ERROR);
        }
        return MessageFactory::send_error_message(message,TBSYS_LOG_LEVEL(ERROR), data_server_info_.id_,
            "write data fail. blockid: %u, fileid: %" PRI64_PREFIX "u. ret: %d", write_info.block_id_,
            write_info.file_id_, ret);
      }

      // if master ds, write data to other slave ds

      // == Write_Master_Server is master 

      //如果是primaryDS,向其他DS发送数据,非primaryDS则跳过这步

      if (Master_Server_Role == write_info.is_server_)
      {
        message->set_server(Slave_Server_Role);
        message->set_lease_id(lease_id);
        message->set_block_version(version);
        //向ds_list中的DS发送数据,这是一个异步post的过程

        ret = post_message_to_server(message, message->get_ds_list());
        if (ret >= 0)
        {
          if (== ret)
          {
            //no slave

            message->reply_message(newStatusMessage(STATUS_MESSAGE_OK));
          }
         //primaryDS在这里就返回了

          return TFS_SUCCESS;
        }
        else
        {
          ds_requester_.req_block_write_complete(write_info.block_id_,lease_id, EXIT_SENDMSG_ERROR);
          return MessageFactory::send_error_message(message,TBSYS_LOG_LEVEL(ERROR), data_server_info_.id_,
              "write data fail to other dataserver (send): blockid: %u, fileid: %" PRI64_PREFIX "u, datalen: %d",
              write_info.block_id_, write_info.file_id_,write_info.length_);
        }
      }

      //master should not execute this statement, while salve will.

      //非primaryDS会执行到这里,给primaryDS返回信息

      message->reply_message(new StatusMessage(STATUS_MESSAGE_OK));

      TBSYS_LOG(
          DEBUG,
          "write data sucessful, blockid: %u, fileid: %" PRI64_PREFIX "u, filenumber: %" PRI64_PREFIX "u, version: %u, leaseid: %u, isserver: %d\n",
          write_info.block_id_, write_info.file_id_,write_info.file_number_, version, lease_id, write_info.is_server_);
      return TFS_SUCCESS;
    }

3. DataServer处理client发送过来的CloseFileMessage消息

   PrimaryDS自己将存储在临时文件或内存中的数据写入真正block位置上,同时通知其他DS完成同样的操作,然后回复消息给NS,等待NS也对写入进行确认之后,primaryDS回复消息给client,整个写入过程完成.

int DataService::close_write_file(CloseFileMessage* message)
    {
      CloseFileInfo close_file_info = message->get_close_file_info();

      int32_t lease_id = message->get_lease_id();
      uint64_t peer_id = message->get_connection()->getPeerId();

      TBSYS_LOG(
          DEBUG,
          "close write file, blockid: %u, fileid: %" PRI64_PREFIX "u, filenumber: %" PRI64_PREFIX "u, leaseid: %u, from: %s\n",
          close_file_info.block_id_, close_file_info.file_id_,close_file_info.file_number_, lease_id,
          tbsys::CNetUtil::addrToString(peer_id).c_str());

      int32_t write_file_size = 0;

      //这个过程比较复杂,大致是检查lease是否过期、从临时文件或临时内容中读出数据,写入到真正的block位置上
      int ret = data_management_.close_write_file(close_file_info,write_file_size);
      if (TFS_SUCCESS != ret)
      {
        if (EXIT_DATAFILE_EXPIRE_ERROR == ret)
        {
          return MessageFactory::send_error_message(
              message,
              TBSYS_LOG_LEVEL(ERROR),
              data_server_info_.id_,
              "datafile is null(maybe expired). blockid: %u, fileid: %"PRI64_PREFIX "u, filenumber: %" PRI64_PREFIX "u, ret: %d",
              close_file_info.block_id_, close_file_info.file_id_,close_file_info.file_number_, ret);
        }
        else if (EXIT_NO_LOGICBLOCK_ERROR == ret)
        {
          return MessageFactory::send_error_message(message,TBSYS_LOG_LEVEL(ERROR), data_server_info_.id_,
              "close write file failed. block is not exist. blockid: %u, fileid: %" PRI64_PREFIX "u, ret: %d",
              close_file_info.block_id_, close_file_info.file_id_, ret);
        }
        else if (TFS_SUCCESS != ret)
        {
          try_add_repair_task(close_file_info.block_id_, ret);
          if (CLOSE_FILE_SLAVER != close_file_info.mode_)
          {
            ds_requester_.req_block_write_complete(close_file_info.block_id_,lease_id, ret);
          }
          return MessageFactory::send_error_message(
              message,
              TBSYS_LOG_LEVEL(ERROR),
              data_server_info_.id_,
              "close write file error. blockid: %u, fileid : %"PRI64_PREFIX "u, filenumber: %" PRI64_PREFIX "u. ret: %d",
              close_file_info.block_id_, close_file_info.file_id_,close_file_info.file_number_, ret);
        }
      }

      BlockInfo* blk = NULL;
      int32_t visit_count = 0;
      ret = data_management_.get_block_info(close_file_info.block_id_, blk,visit_count);
      if (TFS_SUCCESS != ret)
      {
        return MessageFactory::send_error_message(message,TBSYS_LOG_LEVEL(ERROR), data_server_info_.id_,
            "close write file failed. block is not exist. blockid: %u, fileid: %" PRI64_PREFIX "u, ret: %d",
            close_file_info.block_id_, close_file_info.file_id_, ret);
      }

      //if it is master DS. Send to other slave ds

      //primaryDS将关闭文件的消息发送给其他非primaryDS,执行前面的数据写入操作
      if (CLOSE_FILE_SLAVER != close_file_info.mode_)
      {
        do_stat(peer_id, write_file_size, write_file_size, 0,AccessStat::WRITE_BYTES);

        message->set_mode(CLOSE_FILE_SLAVER);
        message->set_block(blk);

        //这个是异步通信,使用条件等待(cond_.wait())直到收到所有DS发送回来的消息才返回
        int send_ret = send_message_to_slave_ds(message, message->get_ds_list());
        if (TFS_SUCCESS != send_ret)
        {
          // other ds failed, release lease
          ds_requester_.req_block_write_complete(close_file_info.block_id_,lease_id, TFS_ERROR);
          return MessageFactory::send_error_message(message,TBSYS_LOG_LEVEL(ERROR), data_server_info_.id_,
              "close write file to other dataserver fail, blockid: %u, fileid: %" PRI64_PREFIX "u, send_ret: %d",
              close_file_info.block_id_, close_file_info.file_id_,send_ret);
        }
        else
        {
          //commit
          //提交writeCommit消息给NS。NS更新相关元数据信息,回确认消息给primaryDS
          //再由primaryDS回消息给client,整个写流程结束

          //req_block_write_complete给NS发送wirteCommmit消息
          int ret_code =ds_requester_.req_block_write_complete(close_file_info.block_id_, lease_id, TFS_SUCCESS);
          if (TFS_SUCCESS == ret_code)
          {
            //sync to mirror
            int option_flag = message->get_option_flag();
            if (== (option_flag & TFS_FILE_NO_SYNC_LOG))
            {
              TBSYS_LOG(INFO, " write sync log, blockid: %u, fileid: %"PRI64_PREFIX "u", close_file_info.block_id_,
                  close_file_info.file_id_);

              //为什么在写完数据之后才写日志???
              ret_code = sync_mirror_->write_sync_log(OPLOG_INSERT,close_file_info.block_id_,
                  close_file_info.file_id_);
            }
          }
 

          //primaryDS给client回写入成功消息
          if (TFS_SUCCESS == ret_code)
          {
            message->reply_message(new StatusMessage(STATUS_MESSAGE_OK));
            TBSYS_LOG(INFO, "write successful. blockid: %u, fileid: %"PRI64_PREFIX "u\n", close_file_info.block_id_,
                close_file_info.file_id_);
          }
          else
          {
            TBSYS_LOG(ERROR,
                "rep block write complete or write sync log fail, blockid: %u, fileid: %" PRI64_PREFIX "u, ret: %d",
                close_file_info.block_id_, close_file_info.file_id_,ret_code);
            message->reply_message(new StatusMessage(STATUS_MESSAGE_ERROR));
          }
        }
      }

      //非primaryDS给primaryDS发送写入操作成功的消息
      else
      {
        TBSYS_LOG(INFO, "slave write successful. blockid: %u, fileid: %"PRI64_PREFIX "u\n", close_file_info.block_id_,
            close_file_info.file_id_);
        //slave will save seqno to prevent from the conflict when this block change to master block
        BlockInfo* copyblk = message->get_block();
        if (NULL != copyblk)
        {
          blk->seq_no_ = copyblk->seq_no_;
        }


        message->reply_message(new StatusMessage(STATUS_MESSAGE_OK));
      }

      return TFS_SUCCESS;
    }

猜你喜欢

转载自blog.csdn.net/gochenguowei/article/details/81113945
TFS