大数据脚本处理(千张表,上亿级别数据)

几个重点,

1,文本记录数据落地点,2,死循环断开点  3,防止sql断开连接(MySQL server has gone away)4,防止php超时 

5,日志跟踪   6,递归处理数据(业务需求),7,正则匹配数据(业务需求)增强程序的可执行性

<?php
/**
 * Created by PhpStorm.
 * User: Administrator
 * Date: 2017/9/8 0008
 * Time: 上午 10:52
 */

global $_G;
echo $_GET['exec'] !== 1;


if ($_GET['exec'] !== '1') {
    exit('Pemission denied');
}

if ($_GET['val'] !== null) {
    $fidArr['val'] = $_GET['val'];
}

$pid =isset($_GET['pid']) ? $_GET['pid'] : 0;

//获取所有数据表
$sql = 'SELECT fid FROM ' . DB::table('forum_forum');
$resource = DB::query($sql);
$re = DB::fetch_all($resource);
$fidArr = array_unique(array_column($re, 'fid'));

//切换数据库连接
$object = DB::object();
$object->set_config($_G['config']['db']);
$object->connect(2);

$fun = function ($val,$pid) {

    set_time_limit(0);// 防止时间超时
    DB::query('SET SESSION wait_timeout=7200'); // 防止出现 MySQL server has gone away
    $csql = "SELECT count(*) FROM  `pre_forum_post_" . $val . "`;";
    $ct = DB::result_first($csql);//获取表里面所有的message数据
//死循环开始
    while (1) {
        $result = DB::fetch_all(DB::query('SELECT pid,authorid,parentpid,message FROM ' . DB::table('forum_post_' . $val) . " WHERE pid > {$pid} ORDER BY pid LIMIT 100"));
        if (!$result) {
            runlog('pre_forum_post___finish__', $val);
            echo 'pre_forum_post_' . $val . ' __finish__' . $result;
            echo '<br />';
            break;
        } else {
            runlog('get-message-fail', $result);
            echo 'pre_forum_post_success_' . $val . ' __get-message-fail__' . $result;
            echo '<br />';
            $start=array_shift($result);$end=end($result);
            foreach ($result as $r) {
                $presult = pidUpdate($val,$r);
                if (!$presult) {
                    $presult['authorid'] = 0;
                    $presult['pid'] = 0;
                }
                $res = DB::update('forum_post_' . $val,
                    array('parentpid' => $presult['pid']), 'pid =' . $r['pid']);
                if ($res) {
                    runlog('UPDATE-parentpid-success_', $r['pid']);
                    echo 'UPDATE-parentpid-success_pre_forum_post_' . $val . ' __pid__' . $r['pid'];

                } else {
                    runlog('UPDATE-parentpid-fail', $r['pid']);
                    echo 'UPDATE-parentpid-fail_pre_forum_post_' . $val . ' __pid__' . $r['pid'];
                }
            }
            setLastStatus($end['pid'],$start['pid'],$end['pid'],$val,$end['pid']);
            sleep(2);// 执行1000后休息两秒
        }
    }
};

function pidUpdate($val, $result)
{
    static $result2;
    //正则匹配对应数据dpost&pid=61414914&ptid= 中间pid=里面的数字
    preg_match("/&pid=(\d+)/", $result['message'], $out);
    if ($out) {
        $sql2 = "SELECT pid,authorid,parentpid,message FROM  `pre_forum_post_" . $val . "` WHERE pid = '{$out[1]}';";
        $result2 = DB::fetch(DB::query($sql2, 'SILENT'));//执行创建语句,遇到错误依然执行
        if ($result2 && $result2['parentpid'] != 0) {
            $res = DB::update('forum_post_' . $val,
                array('parentuid' => $result2['authorid']), 'pid =' . $result['pid']);
            if($res){
                pidUpdate($val, $result2);
                runlog('UPDATE-parentpid-success', $val.'---'.$result['pid']);
                echo 'UPDATE-parentpid-success' . $val.'---'.$result['pid'];
            }else{
                runlog('UPDATE-parentpid-fail', $val.'---'.$result['pid']);
                echo 'UPDATE-parentpid-success' . $val.'---'.$result['pid'];
            }
        }else{
            DB::query("UPDATE ".DB::table('forum_post_' . $val)." SET count=count+1 WHERE pid='$out[1]'",'SILENT');
        }
        return $result2;
    }
}

//循环执行,所有得表
foreach ($fidArr as $item) {
    $fun($item);
}

function setLastStatus($maxPid, $start, $end,$_val,$_pid)
{
    if (!is_dir(THINKPHP_ROOT . 'data/tmp/')) {
        @mkdir(THINKPHP_ROOT. 'data/tmp/');
    }
    $data = json_encode(['max_id' => $maxPid, 'start' => $start, 'end' => $end]);
    return file_put_contents(getLastStatusFile($_val,$_pid), $data);
}

function getLastStatusFile($_val,$_pid)
{
    return sprintf(THINKPHP_ROOT. 'data/tmp/_last_pre_forum_post__%d_pid_%d', $_val, $_pid);
}


$object->close(); //关闭当前链接

echo 'success!';

猜你喜欢

转载自blog.csdn.net/Baron0071/article/details/83213458