hive_hbase一个综合练习题目总共包括以下部分

1.数据的预处理阶段
2.数据的入库操作阶段
3.数据的分析阶段
4.数据保存到数据库阶段
5.数据的查询显示阶段
给出数据格式表和数据示例,请先阅读数据说明,再做相应题目。
提供文本内容:
video.txt

y5kwKp6y8t4:TheReceptionist:713:Entertainment:204:2137:4.61:70:64:PF_ZMlw4rHs:c3XKOAxKc_w:yr73064QrGE:atfNL0_KAcs:kzwa8NBlUeo:qN4YDGysDEI:Z8SMY2jyAYw:sHZwE5GdEdI:xsoIcvgVzbU:CLIEkU0XHO0:aAgZ2Xw6qBc:nWyAgNCHk58:x-fPtq5eQ3o:94yk8QzmA28:c9Ztjs_EXWY:bs-L0XwiHg0:BpbvBwOeCEU:vxBzBimfAYs:5wn4farR4ws:MVcmftKCce8
oHknurFKx64:TheReceptionist:616:Entertainment:53:11056:3.83:81:47:ynapticChaos:571:Entertainment:305:11182:3.18:83:DE:ffv9Ol-A6KE:IbRsNdtkQg0:kLj3Q5g7-Ag:gku2wNI0Akc:31MCXmVoyl0:9WP9aq0B1QE:6ohKngEyUmY:YmWphrOsyfw:Gnbls__5gdo:rUCiO7c4zLE:hfrlPgysH7Q:Cngay6q2DWU:bs-L0XwiHg0:OPmYbP0F4Zw:y5kwKp6y8t4:LpCCsPergb4
LpCCsPergb4:sneakypetefootvids:688:People & Blogs:88:17449:4.52:21:3:mCkCcEsLBQc:h_HFYtLUQUU:T7HTpO76gjk:Bji0d-PCxok:rkpEXrgASFQ:YzUL8mO-1KY:32Nu8cFC0mc:evuk2-zAZmQ:wpo_eXIH2Tg:Qdzvsu8O_kw:dKiwfxmPx_A:k5Qj78aXNUA:dqjEBAjHMG4:cTIZc8AFfgY:-Z2lTcrZBRE:jLoMRNTkYTA:ZIXTmNfGnOE:oeXwFmyMoNs:EyYGB5VktxQ
qF2v6mW9J2k:jresch:632:Comedy:192:367:4.25:4:0:y5kwKp6y8t4:oHknurFKx64:LpCCsPergb4:vcVYexixsmA:LZi2ryWsShY:iO8qKGBL9vY:sV0E-cRjVSQ:OPmYbP0F4Zw:zdnuTg9K5XU:UnfbKKvUG9Q:t7Y8yKNGEo8:4XQETEbMaek:Yph6sRK0vog:a3_boc9Z_Pc:wDQUKSLEEvE:GQbGA_QSXDU:qR8WRLrO2aQ:goQu8PksNn0:uG9bopFBOnI:Pdyy_kizNRo
vcVYexixsmA:BudSuperBowlXLI:717:Comedy:30:809324:3.77:1092:141:4JXPZapLVVw:TfG36T7Ms74:gfayO0hBWc8:XETLf3u43uk:4uq3yjKEXIQ:SwaTC7qFwaI:KA3YSggFzEs:Ys4UrcI11j4:X_T9GeFTQI0:YqCJJgRXFG0:irsWoUpBvCY:Ryt3jCEiOZA:8Cz_HEh6L3k:lFH9hN3K-Js:dmYYTownZEQ:k93suUhwfXw:Zr2XKAEI0oQ:DTg6n5EV6jI:QnoLALdjZBU:lucha0X54js
LZi2ryWsShY:lovejoy71:433:People & Blogs:111:47234:4.94:65:32:9G3rVGW4JrI:UnfbKKvUG9Q:753jCzdr_4w:QwNb2WZu8hE:0KyD0ZA2RRY:T6_91j86v5I:yJDPn0sPgus:uz50jqNcHRw:cFQUvZD8X0w:kHkdIiadj7E:Y0cHBgzhc6k:ioyQi-rb1DM:ncOP-9pZD7c:FThqh3xmcfw:CuToVngYyzc:ZkR9jFGFijo:bqAMoOufevw:_sf_0ICtCDQ:b2L8Y9AIgBE:OnEMs6jlRfo
iO8qKGBL9vY:HIFF26:617:Entertainment:526:180:1:1:0:QpHksBZc4C0:O89TZyAgruU:lqmZuINZlGM:wwISnUYV_4I:B4hct7s771M:dS6YbO38SO4:mJDbO7FYEYI:j86MheG1hoc:m05upWuXzJA:1DksPoLl6xs:5f_65oUbAu4:_YoZIjn4H04:Eesk6IkFBAI:R-ay5yI1-9Q:AB5ijurIVPU:k87NTxFSpN8:wbMGPz8Xc0E:_h2hOWpXaLc:gbCBse4qRXE:fxXWr9jshek
sV0E-cRjVSQ:Yuki869:504:People & Blogs:406:33738:4.7:43:18:mcASJTduDlw:78ODc7joJhg:k2c8w5tjsWc:FZp6nyqMGUM:QNPZ0N04-og:qXAaERns9FM:cIcYeBigR2Y:x0jz-vIjHfo:JqG3xz5JVF0:s75IPvXJ3h0:84xLLsaPZ_Y:DOo-piTHYic:04tubbLqABw:otPIYLhOszg:WUGhrCaRyrM:Mky19Fa2mGg:kAZlqE_wsnM:-GOaECf3MrM:gyaC2ZKkCZA:y3fTPXDH6NA
OPmYbP0F4Zw:rockwoodcomic:561:Music:183:892595:4.71:1517:699:_Jglk7i8do8:gnmqIUjyWsw:SjFetumImrE:aTujd1yNyMw:lpS2nSISQqg:OMWl8KVqErM:59302FgITAM:Yph6sRK0vog:cKYI39xXgdI:UUgKkZOiW2M:G4P0W3OB-XE:qsPUR6XPH6Q:B52z7Zfjo1Q:g6bwmcIzxCg:3nie9lKPifg:b2azrZAqIOw:-F9WkqwliLE:-ZhfkPeBZKw:2hzEdRUCvGc:ApE0A2qORAE
zdnuTg9K5XU:punahou90:418:Sports:201:38297:4.92:86:51:0zbpY3ny2TA:OIh4j1vYvCk:sjE8FnPNgEg:9C5HLScbMYc:eszsw-JFSV0:9gDZj7d7f98:Ocm3Ux2SbGk:y84O9ktMk1k:6FeR-J6MY-4:m5VNainRJkw:MbryO9LREMs:YgNm1pvzd6k:RBQLxRVYdDQ:JHkL5R7L37c:zYbrA6S8Wt4:Zv0_EpF5SyY:NRzQmx3CbgQ:D5OeyjeenEY:6zEXgLa3oZE:GPdkhW6JbkE
UnfbKKvUG9Q:gerik308:573:Sports:26:12897:4.47:17:25:LZi2ryWsShY:vcVYexixsmA:xOeFiII9mSE:iO8qKGBL9vY:sV0E-cRjVSQ:OO8Ti1wVpyM:zdnuTg9K5XU:OPmYbP0F4Zw:GzDvbQtqyOs:t7Y8yKNGEo8:vdkmhquF60o:WQGOEXwutDY:s7Ne0r-rtlk:AgoQ6PDxihY:R0PMtuoQDoI:JA_1zHzvUV4:wDQUKSLEEvE:4O4zABjzlBU:a3_boc9Z_Pc:HZRI5Y-wVz0
t7Y8yKNGEo8:philleif:428:Entertainment:90:2480:5:1:0:QLz6U1FqviM:n5UzEHyI23g:7GdKknxne7U:bQlutHdrfPk:GGIN5oODrok:gtvY5--vJUs:oClDea0v3mM:lifW3jFwh5U:oypDPHKGsN8:MTuiMCEuDNA:oYpJV-b6jzY:J3738gNvsNE:Y0gZsR-8IMU:JbGijCzDGrk:Uaqmjmt8eD4:EwMolP10XM0:xqarrTPT_No:GvPDFUvIJGY:sOZPQSzVCbE:JfETohgFDsE
Yph6sRK0vog:DamitaJo79:716:Comedy:183:1956:4.67:6:2:OPmYbP0F4Zw:SjFetumImrE:3nie9lKPifg:_Jglk7i8do8:gnmqIUjyWsw:mTjmV1nmB2w:B52z7Zfjo1Q:zpx9hb4PnkA:pWyr7xnTu6c:jdTLO9RJp08:EMCLsRCjKK4:ks0qN2cTxIY:9PXSmhNC71E:QNNCNde31uM:FQgv-xqXQWg:8vWRamcNj_Q:un3-Hb9wF9s:2TubTqoLdd0:K5EhXGbnkPk:H_L8Fdo6UMQ
wDQUKSLEEvE:Gentleman1911:522:Music:285:5140:4.56:9:8:BsPZQ8xIaDA:kHjaBjgzopU:nTeNqGuOmQA:SU_6wxdZPr0:eY7d7zlPB3Y:isU-jUvdyYM:7jk0SZeFJ6w:Q_wRIkZUkI4:f7zMJ0G563I:pK0uAhU_yWE:9W69QslMNyU:wCeNdFul4dY:UgvTkfhOhLU:hKkACP7CZyI:i40L1nqNrz4:PLKUij_NhS8:AnjHy8iEzfs:KxFOG-JA9FU:OgLvCVL9jyY:I0dXWy6mFaM
GQbGA_QSXDU:thairakthai:453:People & Blogs:191:87:0:0:0
qR8WRLrO2aQ:mienge:406:People & Blogs:599:2788:5:1:0:4UUEKhr6vfA:zvDPXgPiiWI:TxP1eXHJQ2Q:k5Kb1K0zVxU:hLP_mJIMNFg:tzNRSSTGF4o:BrUGfqJANn8:OVIc-mNxqHc:gdxtKvNiYXc:bHZRZ-1A-qk:GUJdU6uHyzU:eyZOjktUb5M:Dv15_9gnM2A:lMQydgG1N2k:U0gZppW_-2Y:dUVU6xpMc6Y:ApA6VEYI8zQ:a3_boc9Z_Pc:N1z4tYob0hM:2UJkU2neoBs
goQu8PksNn0:dhanji:602:People & Blogs:260:7305:4.55:11:8:1MDV6fM0Mu8:nQXV0h1kOKc:2PFMouZoKGw:VcDeGnAnNSU:BKIOOCdH3LY:XYxAjBCc7d8:I8ERtnWRwzA:hVRG3vrjNxc:SKKqU1uSDmU:gJm_UB3hMJg:QABQAjgqfVM:vTPKSsjleGs:_OXDCRSfO5M:ictjN0buiKg:KXq4j2aStGw:BL6hcqdRzs8:LS-xTWSgb1Y:-B8UWZ6xb1o:u8e35tybCTo:JFjwqnakmGk
0vdG-FwpulQ:TheReceptionist:650:Entertainment:187:199523:4.34:1249:563:Nds16trjIGo:O9TQb3mBDME:lWuzJpwYg0s:_th07YE0cmQ:v5F9RtVqP4Q:h9fNKnZqq0M:lXC7ZBxaahU:xvuBkzJoAWE:AeyCBknKYBg:UErAYoDHsks:k8aXCcPcDE4:PBgIPe6fb68:s46FreUlR3c:y5kwKp6y8t4:oHknurFKx64:LpCCsPergb4:qF2v6mW9J2k:vcVYexixsmA:LZi2ryWsShY:iO8qKGBL9vY
luWqWuBIQ50:TheReceptionist:664:Entertainment:291:4015:4.83:161:169:AeQiei1jsJc:kwEDEwGRGFU:QpSAvF46yVM:euk7-4kbxR4:HWk2idcA1yA:yJyqJW7BNe0:oiz5ExZSL0U:tJSdM2H80VY:MAnIgp0bYB4:tjgaJvgKVvU:ZJzEAcUlbHY:CLBgdaB-GFE:Q8RFCcbUbiE:kmkE9NX3SQk:34rYOwpXqaA:3Ja4qSj5EWM:sMr_70VPmmA:OgwS_XxHl_A:nWXg0kD_DeE:FlC7uxL6ytE
Nds16trjIGo:rontv:618:Entertainment:97:3777:2.41:27:28:0vdG-FwpulQ:O9TQb3mBDME:lWuzJpwYg0s:_th07YE0cmQ:v5F9RtVqP4Q:h9fNKnZqq0M:lXC7ZBxaahU:xvuBkzJoAWE:AeyCBknKYBg:UErAYoDHsks:k8aXCcPcDE4:PBgIPe6fb68:s46FreUlR3c:y5kwKp6y8t4:oHknurFKx64:LpCCsPergb4:qF2v6mW9J2k:vcVYexixsmA:LZi2ryWsShY:iO8qKGBL9vY
_th07YE0cmQ:ODonCody:655:Entertainment:38:3641:1.7:40:99:0vdG-FwpulQ:Nds16trjIGo:O9TQb3mBDME:lWuzJpwYg0s:v5F9RtVqP4Q:h9fNKnZqq0M:lXC7ZBxaahU:xvuBkzJoAWE:AeyCBknKYBg:UErAYoDHsks:k8aXCcPcDE4:PBgIPe6fb68:s46FreUlR3c:y5kwKp6y8t4:oHknurFKx64:LpCCsPergb4:qF2v6mW9J2k:vcVYexixsmA:LZi2ryWsShY:iO8qKGBL9vY
v5F9RtVqP4Q:Piety:655:Entertainment:4:3482:3.04:26:31:0vdG-FwpulQ:Nds16trjIGo:O9TQb3mBDME:lWuzJpwYg0s:_th07YE0cmQ:h9fNKnZqq0M:lXC7ZBxaahU:xvuBkzJoAWE:AeyCBknKYBg:UErAYoDHsks:k8aXCcPcDE4:PBgIPe6fb68:s46FreUlR3c:y5kwKp6y8t4:oHknurFKx64:LpCCsPergb4:qF2v6mW9J2k:vcVYexixsmA:LZi2ryWsShY:iO8qKGBL9vY
O9TQb3mBDME:lnelson:550:Entertainment:80:2786:4:6:9:y-mxGyedX1Y:-wcK8GhPh-I:UKeDWCLajQk:ynxkMjeo1jA:PmtP5AzppO4:F__Y1HMZ9eo:-xuugq7fito:x68vJxc0pAM:GscpuDS1sLU:8uwuLxrv8jY:vWj2EBCIvlQ:pBF5AiIg_ms:gJFJ8WGhIk0:Cl6VWVUqK7k:xE_1u-5uPEE:m8YdFkVuthI:P4A4oo6_xhk:AldaLWwbxlw:XKr1Ou8_T3E:o_F2xwlrT_8
ati2fDMaggA:clicheba:648:Comedy:49:248:3:2:0:olBoTntMiGk:kkfyRv9bpZk:CY-6uy-TeQo:pUDmUPZzvYI:0B13sb3IE34:jSI1Bt_lM6c:zwcj_h03jYc:3869FWMuOUU:LzX2RmOiIw8:EhQlIxeow2o:PVzYDCNngJ0:fmq22KZy8Po:jWj3nJqsjdA:uS3oIVzALvU:EdTfmQVNBSA:vJ_HY09_s1Y:T8bss67s5OY:GRxIJs0NHqI:5ZRd8dlawYU:toBchVIOmmQ
e8p2ecFw8Qk:stavi7:618:Comedy:402:203:5:3:2:wxu5EUpsANY:HZSfe16yVCE:NXnp2Yjq2ds:1dqrasSiLmE:YOiLhLkXvWo:7k2JPyNsYUQ:JKD_8Fr4kJE:DYr4EnGEC5w:8PERPElI2aQ:UgywpTbxVCI:OTSUfR0wfzg:5owCa_69WVY:giakZxNqmKM:bFdOLbN9FZM:Q-uQz7C2Ci8:CynPkbZayRw:P-sAtJPYhI8:4bBqrHvZwUU:B_W5LU4v_RU:OfhJ2t0nFkA
lWuzJpwYg0s:chrizzz:543:Comedy:42:2721:3.45:11:11:-dcmDscwEcI:9Z57W0g1TRo:EdNnEwk6bz8:m4ac5STw97I:vNW0Ysoauqg:OJeA_QnJ22M:wZ_PLX27eqE:oF0xz2HvA8c:OcIWgDSmH04:3R8_wUaziqc:FEZeoHwbkh4:ga6zAEB9fOM:U5IkWLj2_XQ:Z6msZ31Dgn4:0t7ldKoX4LA:_m8AzP-S-ls:cTY4Yo2SR2o:PgM11RtGjeI:k6BzyN5p4aM:JguvDy0kxPM
dtHCZV0Cgdo:hannahsophilotti:721:Comedy:77:348:0:0:1:y5kwKp6y8t4:oHknurFKx64:dN1ozA41TXQ:qF2v6mW9J2k:LpCCsPergb4:iVFIJScazyo:X_OcnDjpbUA:XZ-2nY6AwDU:gThFIdP0xfw:3ZZTNxsUPpU:zQ10GdzzhLg:Kvgb-ovPpyY:TRf752D-XxU:D3JUGBqi-rc:wvcjy4qACPw:jSkYKglRbTM:cPBZvrw82Nc:cvdu1TLhnRA:gVt9T-raEI0:x6ejXBgR3eg
IM8cEafvq0I:TheReceptionist:727:Entertainment:143:2678:4.7:137:101:aD6C0Sdahw8:2cU31i0zhq8:oCROXcaGIGQ:aK5hYowLfvI:l5jTZOJ5W18:cq0AJ-OJpt8:tkXpCvoEidI:pTVyB9ZGtxo:dHol4ICeDoo:TshWB77-7M4:i9NdhSbtTDc:7oVvkNp4GdA:U0DTgnOar6k:uBFiVtzHsa4:yQrPVsr17g4:mEHwkWyWD0Q:9isMwsQiQrg:rX0vOYwHj30:l3zUOvKl5ZA:IUTkWIx6xu0
wxu5EUpsANY:crankstrap:343:Music:218:914:4.46:13:4:G6mkLG7gHL0:W6bN4IQIZ_M:YgDeSnIgTJ8:l5Qx4u-Uua8:jWFV5PF27Bk:584Ui5-HjIg:G7TXDc0WwAA:kPNe2QKTTK8:ZJZvMpZwOrM:QNcugwSDEf8:AdRQ7tc08Yk:bemhCetpdNM:LnqHGjRkurQ:jWZWeajvEAU:mkur_lnqMMU:MFOwW7s9ukk:8FcY9yhse7c:5v7LjtK22fU:v1Qd5LGUO70:a8Kkgm-ReZ4
OZvAelcOHY4:elanegica:637:Comedy:104:205:0:0:0:ZCU7O58RKxQ:z3Js6yO64No:LKh7zAJ4nwo:DrWD3It9xik:5m-7O_FMCL4:bryp4N2LCmk:8ZETgiZV7GE:aLpYTe3ulS8:WPcHrYMvcEY:jfFNA38bv78:GRxIJs0NHqI:yPc65cYtFjI:-xewAHNEfRg:hE2icO6N7Vo:vRT41HMRw18:RojBCLlrZBo:KEnH5GytOB8:DfXAAH6ZN3A:n0jeJvmWQ1A:J2EJxedzCHw
DfXAAH6ZN3A:aaandooo:694:Film & Animation:33:1449:4.06:31:25:uElTCB5bCfw:XPNz_FB7rM8:EGlNsWi8CB8:_pA-gR02Vok:DP1N9eOut-E:DtnZuqj60wM:gUKzSAea89A:eagbuLoaoPg:Qa0AkfO49WY:_Pch_77AX08:KlJ3pS8wyvA:50NVDH3_d5A:tHLFlEUZ508:-6LRdXRvNrU:EpuQcrjzVAM:fYKYXumNybg:1XA4qcq_XH0:yhuIOPjKnj0:dtVDb5BInRI:QYp7o_iWuDY
fYKYXumNybg:salvothasock: 0 : UNA :62:2065:2.88:8:5
-jvF6B3gVnw:Tharavol:191:Film & Animation:245:451:5:2:7:2GsF0JHejTs:AdR9axQq8mo:ZmySgw0eL6Q:A-DYgWR_vhs:4op9TYRP1yI:oF38MIDM1MY:AbrAk_lChC4:yCg1tW-ooQ0:gnFituPeZJ8:X5i-IjX0YfM:73mTp4Ns-7M:69jGxkHMwOs:OtQ55QSWLoM:vOrbbKKXJOU:obLKpKq6gfs:XD3rThR3sMM:fBXeHRCrXQc:CU0WI7v2WBo:oyYN-rkCUzc:SlXulu7-kms
962do71Wez4:peety:367:Comedy:202:1239:4.5:2:3:jpLJpWkQQsE:Rpg29kDpHvM:95U52TQ4OeQ:74XCbvATdqk:H-RomskCJp0:nO2eu4FRsdo:FXDreNVpJqI:euUJ9K8bdvQ:EtT6g9oTDDk:4h8reMQBCXE:jd9JnERqUR4:uOWuwQS_xoY:rWSR8eS4xkc:KYods5EcKww:NwqnJ9ORceo:UfZ0AH3vmgs:hONFFU6tIjc:9erEY5aBVmE:YL6njQVImGA:Y_4R7zHvfkU
q0891fGHlSk:evman44152:382:Entertainment:179:1596:4.54:74:4:9Lb2jzmLsLE:WU0H-_2QJ-0:WJ3D-3WsK3U:z1lQ3SZzIqQ:cpEsNrYEssw:LlJf2CvUSBg:_pgvRSP3srg:kYzInXv19sw:4Kve1oGPjf8:0V6bOB8ypk4:inDkU6Cbpas:sYyDuoKA1V4:TIY-QL7026E:uzX33IwmJhw:9gSCJ0ZQt08:kaZreAVJMsA:8pE9uwYqbQs:3rMsbeucW6Y:NjI_llVWjNQ:r54wZISDXLs
iwQBxtUL7B8:vivianyuww:517:Entertainment:529:33201:4.87:70:8:LGVU5DsezE0:ug7jDiXxIy8:nJ9TMziTj10:Q007Rt8okfE:bWzIFgi7KF0:wlpC8MauJJU:GV7nxm9ALeo:Oq0PcJn-cOQ:GLpXJurv_hY:NzVm6tfYwWc:uMXxN3KNV38:bOiRnPrn2us:arwxVJsy9Gc:p1k_Sp1dOYk:nOHMLE1z24o:ZpEK2j4GR1c:VqdefaDQKsg:QAxLRzqOTcs:5TqLU7-CLuc:1d7-8Bpr68s
s_w38Dp15rs:Glutton18:668:Entertainment:361:5932:4.76:21:13:TbsBNM6uS_4:bQm5-IRe6uY:sqH9jlyV0Fw:1b8vbZmwyEc:t8P1kxMgqSU:j3EyFriMlnc:hE2icO6N7Vo:FfaQNTkRtTs:xV-vGw3Vi0g:pWie49zaqvo:zIG8RJRR_mE:-JKlYiWx1ng:j0eLPPGSpcI:cev7uzyZJn0:Nl2TXuqWfh8:hkEr_S6lu9w:GfNfHwj50eE:YZ-h7LjlStA:PVzYDCNngJ0:JO4yn10Ttjw
xu1dkd2M2ns:buegrasso:506:Sports:152:77582:4.81:259:101:95Lgm_7VeQ8:WeTLFOPmb3s:g7_2qI-VQYM:q2DJfaKGkzs:ouEQTA5mkCA:mwBDHxT9tEo:Bo_iB87X3sc:HYg-MuF7eRY:-N-rKfpfZkc:sNgV02F3VjQ:k_KnXgpqol8:oXoY49G-0Jw:pPHY4PdEKPI:-IvzunIBjQU:uzGRdNm_RA0:6L50gZn4Iy8:K3hAywhn9as:67kT7sO3IpQ:zY5Z4rGjk0A:QN0-VOcH4WU
vzI4uX0DnXo:fcm86:507:Sports:627:40290:4.95:170:27:sNu8ekOGzZg:CgONophDcEY:n3j7HKcBJYo:BCeJLdB9mYU:4pGEeGLJs6Y:35n99iq5LSg:APeL1Uk1bFk:q9gPIw_C5TI:u4l1NA0AS7k:OjwYJC2Glb4:Uw9ch_Zi7dQ:jsXkxBshZnU:QlrGkqAoswA:oZv4RkrafkQ:ndj1k1IvaA8:i7SohlZzJS0:RMUbOyD9QbI:3bMOv3yptDU:h4kYiCDUU04:-FQzDcacXGw
d2im3hBdnWY:magalino:505:Sports:114:8517:4.95:19:6:Z9pciMKE-WA:psOdP_h1zds:xu1dkd2M2ns:xnq4ZpTHOIs:WeTLFOPmb3s:6hF6Ah166XQ:K3hAywhn9as:m-BHQf09MNY:7ZMbiXPrzss:a0OeE-z1x5Y:KmQMfplY4cA:noitpoeFLtY:E0wS0TXN7y4:SK77SDR8_9E:6cVIvUzxDtQ:oZv4RkrafkQ:nmk2aRuPtaM:bSEQyX5PifY:ZGaztHkaEUo:VlaVB0zjZk0
jhe9uxI_zJM:lorenzo1210:632:Sports:484:14757:4.89:44:16:KmQMfplY4cA:kjoUqaPDDPs:wP8UzFySZ78:GUQ9tXGrrHk:5reztLBMxoI:7dcyl-4l9es:9ZmHpHvEgTo:qeEcWoMPeCI:4pGEeGLJs6Y:Zw1unWCiPoA:U59IW2aZZ5g:Xl-tw6x1r9c:UFP9bxH9GvA:4S9HUdhJO90:VCT8pQLp-fA:qPJjv5Sk8Vo:F7cnngyuw3E:MBaDfbHShM8:VU_dL7qfcVE:x8SG8r4Mg8A
K3hAywhn9as:Lovejoy83:505:Sports:113:11298:4.85:20:0:vzI4uX0DnXo:HrE_cQRz4Jc:xu1dkd2M2ns:pPHY4PdEKPI:USKLWrE2WJE:ouEQTA5mkCA:noadhi2gfAA:WeTLFOPmb3s:d2im3hBdnWY:bHYghiaAITU:tXNmbLCTb7Q:-FQzDcacXGw:0aUSLreLYR4:n4_j9M-pDHM:_5tWBmpmo1U:pstyKyGib3M:bSEQyX5PifY:WxbVqRMeBPg:i7SohlZzJS0:Nskg7_U1wsE
noitpoeFLtY:ilvermont:512:Sports:112:1433:5:1:0:0cwLgvB4-XM:tZpugRZEuBc:VW75521q3WI:4pGEeGLJs6Y:tXNmbLCTb7Q:h7dsjMQ_qi0:EtnksG66B0g:Lmnqa-Zr52s:Ps7yv1s4CHc:VoARfHfX2Eg:UTAiexqLvAo:W03QHA8omI8:EjxJCU_gKWc:PyHp5qbtkKM:q4Y90NvQKFU:V3hSkt7O9SY:qg70v-zXtDM:zeNJjgAm5L8:_yIdvdvB7zU:w9dipHwjr1E
mq1l5HYpysw:90Diego90:624:Sports:114:3236:5:9:1:g7_2qI-VQYM:JRG0cQPQuL0:95Lgm_7VeQ8:6L50gZn4Iy8:0Dqe3Aodqq0:xu1dkd2M2ns:67kT7sO3IpQ:noadhi2gfAA:stIfPzLFk3k:K3hAywhn9as:p-nSGO48a8U:CQXQAnt_BKQ:ghufdOHvSfM:rwppvm5GZhc:oZv4RkrafkQ:AHoVOvPJw9A:q2DJfaKGkzs:la4xKzK_ISA:9Wu0pzl4uys:4pgvUTQVj7Q
n4_j9M-pDHM:algagna:505:Sports:114:6867:5:10:6:bjt1jiQzfKc:g7_2qI-VQYM:vzI4uX0DnXo:95Lgm_7VeQ8:mq1l5HYpysw:d6FqhlDeoAc:JRG0cQPQuL0:zuQyuacY2kc:RMUbOyD9QbI:HrE_cQRz4Jc:h4kYiCDUU04:rtADJoNqGas:AHoVOvPJw9A:YUt1uhwlc8Q:H2WA8GvFmLU:oZv4RkrafkQ:XYcNq2wXG9c:q2DJfaKGkzs:abKZrW3xGFE:9Wu0pzl4uys
3WvlmW2fVxE:souldamn:505:Sports:114:2920:3.29:7:2:hKymnucRm8o:00ohsn2yRPI:vzI4uX0DnXo:95Lgm_7VeQ8:oZv4RkrafkQ:aLJVwH5Cl-c:WyH_qf2irOY:JRG0cQPQuL0:SygOO9mVxNU:pstyKyGib3M:4G_o3Tw55ks:sTW48xpk9cM:zuQyuacY2kc:hktrpgdJTjA:DH56yrIO5nI:stIfPzLFk3k:KdyEqPzEyOU:RMUbOyD9QbI:ZnJIYnB7AlI:tXNmbLCTb7Q
qeEcWoMPeCI:lorenzogiudici:636:Sports:231:2837:4.33:6:1:yrK53vAq03E:xnq4ZpTHOIs:sFwoHIWDaRE:VU_dL7qfcVE:7dcyl-4l9es:y6OZ058_4v8:5reztLBMxoI:Ce5Jy425RpI:q9gPIw_C5TI:Min112O1pRA:_-D2mqyenxQ:7e0rIiASFq4:-3dTgWQU-dA:LFyqVUfIVSI:PZ4A4SqfW5E:8Xl59PF2WbU:T3y5Ft9Xgdg:kDONvfdcBT4:hVhKtmMHuA4:omqKnNpcrUA
QN0-VOcH4WU:Marchino78:505:Sports:114:7296:4.52:25:3
XVDznwe0p-w:giorgiocollegno:507:Sports:114:1349:5:2:0:LB4rxmsZV6k:wvY6sf-3wtY:xa14L63LpyI:Pwf9Dw6ly4Q:-G1clSlOVCw:LD4kffVNlo0:obvjW-y6S08:j8oVGAqs4ug:QzQdDSWFPyQ:w8DDxvnAlys:0jKetHm94Go:ZQpfkboeQ2I:9az4wx2MYj8:3FpXVf2rcrE:lzVkb3HT87o:ATGCv3ANakw:MpectOVn4nI:TP69XUSxdOU:Ek_5SBTQD14:JPk5Wik828Q
Min112O1pRA:Davidozzo67:605:Sports:80:1950:5:8:8:Imwg6q-eLF0:mrvWjuf4PAE:1Gnxy8p3TBs:4SGLxid90qI:PMXFwp3AJRI:q5eGlidHo4s:RQpQXRpNgdc:DAeyC_7Ok-o:ShFHNBocJgw:r83JZuviYFw:IDaPdMSyb9Q:ODeqMr4DiFs:SH-xatdNSlc:3yZLtOD2XhQ:FsA73doXh3g:dpUIgaA54x4:nQ_PqNRO8Nk:44CTxI6itPo:IZMvHE0DtD4:B4nDW4woEpc
4pGEeGLJs6Y:ottantuno:511:Sports:431:37465:4.53:60:46:kDONvfdcBT4:JCBrt9WsHTk:-3dTgWQU-dA:vwb3KaWfofE:9pTSGia2ZqI:r2xjw-bCNQo:X5BKShdJ3v0:ngK6oVLzVlM:c9sfQ0v-EZU:fBaXvq2MA8A:c5mAQ0noG28:dRoY1OYH1vE:Ii9sen-szdQ:U3vpSDoynO0:SmmMO9oV5kM:8t-MEnLvMEw:DETr2XTHtOA:iIWZRcouhc8:kHoDsqGHfbc
z2k2lGIjJII:Tiberio1982:512:Sports:222:10426:4.68:41:16:qeEcWoMPeCI:w2BBb2Ym1vw:8eCWbkWHSM0:zmb-jAbdF5o:d2im3hBdnWY:yrK53vAq03E:xu1dkd2M2ns:jhe9uxI_zJM:vzI4uX0DnXo:y6OZ058_4v8:DH56yrIO5nI:Min112O1pRA:K3hAywhn9as:RCbIf5isLFI:fSvjItjF4Nk:noitpoeFLtY:Vi4IwQNsuWw:UNubJdIFcms:3WvlmW2fVxE:asa72b3zuN4
FYR5DD4Q90U:MICHELEROMANINI:596:Entertainment:26:530:0:0:0:d2im3hBdnWY:vzI4uX0DnXo:xu1dkd2M2ns:jhe9uxI_zJM:noitpoeFLtY:qeEcWoMPeCI:K3hAywhn9as:4pGEeGLJs6Y:n4_j9M-pDHM:Min112O1pRA:Oelej-Vw1Ac:w2BBb2Ym1vw:OQsE6EG658g:yR6-IXUIx4w:HNRsEWNoh9I:hx-sgSZmkpc:gltb2nwSHAc:KmQMfplY4cA:sFwoHIWDaRE:qIfXxmQIvX0
Oelej-Vw1Ac:paiolaste:621:Sports:37:865:5:2:0:JzqumbhfxRo:LSE-Ia-2z2g:yrK53vAq03E:b_3Hd85yF8s:q2DJfaKGkzs:KmLHqswITrA:76_CYmhjzDE:re4hKCus9Mg:70koR8XE3ck:KOxRr608my0:cqCp_elrUco:6Uqz93EKViU:u9lTlFtLIEs:og76tuyBwJ8:gaBPRQTS7hE:kbjR_EbHPYk:6H5MUNZuV4A:aUAb64fjqHk:jYYKBovM5DQ:MuCcfFCM638
w2BBb2Ym1vw:ropulos:507:Sports:28:5109:4.67:12:13:Bwzttm50Db0:No47p-2lchI:VrKrb90Jy2U:EGa37z-Qya0:xZdBS2XDtlM:JanTHvVeyrE:iz59jM6Qk60:UX2EsprLKOg:gTpkZx5ZRS4:nvzJ7PwFsOM:BuGSVyG_U6c:MyMIJd-2cnc:QWcZMCJtpBg:j1FAGL2zjF8:28JAxWKsHB0:YdmAKQUwibc:0Go1zA070nw:ZN_5T1OMTag:28Zfo0abfdY:iPuTRnybN2M
8EyU-HLAPhk:giosco:505:Sports:167:39785:4.98:54:33:8x5YeJCCutY:KdyEqPzEyOU:hKymnucRm8o:q2DJfaKGkzs:NhuGQDM6fZA:USwC7qg7gnY:sFwoHIWDaRE:fSersCUuVlU:sn0I0F0d1TQ:wCMgX4PsI9k:lXve-zATYuY:2HuDw9dHmo0:cfRba2WHyFk:WL-LtkKDZK4:1m7kW2osFdo:Gzp5f26mhCQ:8fhCi33RcxU:bVSkZzKPp-k:JlPBdJFLr5U:LuZOSUhjZ1Q
h8tCgrO2RP0:danielmc81:713:Sports:16:65:0:0:0:yrK53vAq03E:kjoUqaPDDPs:4LqRfXVcRic:8eCWbkWHSM0:Oelej-Vw1Ac:Min112O1pRA:i6T8W2atlBc:4pGEeGLJs6Y:LW1nUGikzfE:-3dTgWQU-dA:XoB0cw1dKi4:9ZmHpHvEgTo:SolmekCnxEw:Q4GfGKElJnw:7dcyl-4l9es:5reztLBMxoI:8kYFFHwP0bs:jxx4A2JGHaU:hPNvOErgOsE:de06DoV6ZU8
JyU3mJAr-Tc:AGStudios:451:Music:218:10535:4.46:35:25:jfFNA38bv78:W1Uo5DQTtzc:RojBCLlrZBo:fkPKXkloTX4:7hOyO2KVd_I:xlxmwvWekZA:_uE7KlbWEQc:fD0n-eXUH4g:TlZ-ktadTTI:ePRlxk1jsvk:IpEM57w4_NU:SOi0KK-E8zs:Ptkt4DT14v8:CTAnNINWNqw:0LKZbvTRNVI:Z37h1mNQQ3U:vsF95bYZcB4:x2sXC0s2iqU:tHLFlEUZ508:Cw7t5eHVlc4
jfFNA38bv78:hungerartists:624:Entertainment:120:5757:4.79:14:11:JyU3mJAr-Tc:W1Uo5DQTtzc:RojBCLlrZBo:fkPKXkloTX4:7hOyO2KVd_I:xlxmwvWekZA:_uE7KlbWEQc:fD0n-eXUH4g:TlZ-ktadTTI:ePRlxk1jsvk:IpEM57w4_NU:SOi0KK-E8zs:Ptkt4DT14v8:vL7FcvEydqg:CTAnNINWNqw:0LKZbvTRNVI:Z37h1mNQQ3U:8vzqspFxNe4:vsF95bYZcB4:x2sXC0s2iqU
RojBCLlrZBo:hungerartists:655:Entertainment:115:7332:4.83:6:2:JyU3mJAr-Tc:jfFNA38bv78:W1Uo5DQTtzc:fkPKXkloTX4:7hOyO2KVd_I:xlxmwvWekZA:_uE7KlbWEQc:fD0n-eXUH4g:TlZ-ktadTTI:ePRlxk1jsvk:IpEM57w4_NU:SOi0KK-E8zs:Ptkt4DT14v8:vL7FcvEydqg:CTAnNINWNqw:0LKZbvTRNVI:Z37h1mNQQ3U:8vzqspFxNe4:vsF95bYZcB4:x2sXC0s2iqU
fkPKXkloTX4:ass3aol:628:People & Blogs:83:1685:3.92:13:16:7a3RQNq-4Bk:fxoVYFOfM2g:jQ-zkvou8Bw:SUByjvZ-azo:cAWa3bpPFDo:9yvaWboV46s:CC4hsCQzt0o:neq5LLAazHM:Nonxo-Lwk1k:mo2krxeO9Fk:kkDKV67a6Qs:askojKP7wsU:lvQMx1sOHvw:sdTldR17Zms:dZwGuYr0gyg:RUGCbaKoo-k:n4bowzUR7TA:m10jYfSjYEU:aDRIUHw30JE:QxAVk0jYtx8
7hOyO2KVd_I:manoplaman:375:Entertainment:20:4073:5:15:15:JyU3mJAr-Tc:vsF95bYZcB4:fD0n-eXUH4g:Z37h1mNQQ3U:e3jiIW1wl_g:xlxmwvWekZA:Cw7t5eHVlc4:x2sXC0s2iqU:NGTKsMW0UQU:bm2A6Je1cZE:1PTC_zuxrhI:jfFNA38bv78:IpEM57w4_NU:SdJ8EUoGANM:W1Uo5DQTtzc:IM-1FnJDtk8:fkPKXkloTX4:_uE7KlbWEQc:RojBCLlrZBo:CTAnNINWNqw
xlxmwvWekZA:vivalareason:597:Film & Animation:44:27244:4.74:77:42:JyU3mJAr-Tc:jfFNA38bv78:W1Uo5DQTtzc:RojBCLlrZBo:fkPKXkloTX4:7hOyO2KVd_I:_uE7KlbWEQc:fD0n-eXUH4g:TlZ-ktadTTI:ePRlxk1jsvk:IpEM57w4_NU:SOi0KK-E8zs:Ptkt4DT14v8:CTAnNINWNqw:0LKZbvTRNVI:Z37h1mNQQ3U:vsF95bYZcB4:x2sXC0s2iqU:tHLFlEUZ508:Cw7t5eHVlc4
_uE7KlbWEQc:imrational:726:Comedy:71:2455:4.85:39:42:JyU3mJAr-Tc:jfFNA38bv78:W1Uo5DQTtzc:RojBCLlrZBo:fkPKXkloTX4:7hOyO2KVd_I:xlxmwvWekZA:fD0n-eXUH4g:TlZ-ktadTTI:ePRlxk1jsvk:IpEM57w4_NU:SOi0KK-E8zs:Ptkt4DT14v8:vL7FcvEydqg:0LKZbvTRNVI:Z37h1mNQQ3U:vsF95bYZcB4:x2sXC0s2iqU:tHLFlEUZ508:Cw7t5eHVlc4
fD0n-eXUH4g:gonff:649:Gadgets & Games:125:712:3.47:15:8:JyU3mJAr-Tc:jfFNA38bv78:W1Uo5DQTtzc:RojBCLlrZBo:fkPKXkloTX4:7hOyO2KVd_I:xlxmwvWekZA:_uE7KlbWEQc:TlZ-ktadTTI:ePRlxk1jsvk:IpEM57w4_NU:SOi0KK-E8zs:Ptkt4DT14v8:0LKZbvTRNVI:Z37h1mNQQ3U:vsF95bYZcB4:x2sXC0s2iqU:tHLFlEUZ508:Cw7t5eHVlc4:e3jiIW1wl_g
TlZ-ktadTTI:therevfincham:567:Howto & DIY:417:2287:4.95:39:23:vL7FcvEydqg:jfFNA38bv78:8vzqspFxNe4:fkPKXkloTX4:_uE7KlbWEQc:W1Uo5DQTtzc:RojBCLlrZBo:JyU3mJAr-Tc:oTeQd0bL8TE:sLxa9oxs0DA:xlxmwvWekZA:vhbByP2fgjQ:fD0n-eXUH4g:ejYFKT4sYYM:ePRlxk1jsvk:H0xuFSlLuvE:ltJ98vbJprI:HQo63uIZt88:CTAnNINWNqw:9tH2s1GU3yQ
ePRlxk1jsvk:henderob:560:Comedy:21:4501:3.22:9:5:vhbByP2fgjQ:Z37h1mNQQ3U:SJzcN33-nTA:U921apEoUCQ:n9wkqrhqKZA:QBwyecI8dAM:vL7FcvEydqg:jfFNA38bv78:8vzqspFxNe4:fkPKXkloTX4:_uE7KlbWEQc:RojBCLlrZBo:JyU3mJAr-Tc:oTeQd0bL8TE:sLxa9oxs0DA:xlxmwvWekZA:TlZ-ktadTTI:ejYFKT4sYYM:fD0n-eXUH4g:H0xuFSlLuvE
IpEM57w4_NU:WhyIWearTheVeil: 0 : UNA :60:1450:3.73:37:31
SOi0KK-E8zs:FrozenWisdom:667:Film & Animation:19:404:5:5:2:vL7FcvEydqg:jfFNA38bv78:8vzqspFxNe4:fkPKXkloTX4:_uE7KlbWEQc:W1Uo5DQTtzc:RojBCLlrZBo:JyU3mJAr-Tc:oTeQd0bL8TE:sLxa9oxs0DA:xlxmwvWekZA:vhbByP2fgjQ:TlZ-ktadTTI:ejYFKT4sYYM:fD0n-eXUH4g:ePRlxk1jsvk:H0xuFSlLuvE:CTAnNINWNqw:9tH2s1GU3yQ:aB1rKbXTS_g
Ptkt4DT14v8:PhoenixesRose:525:News & Politics:62:5325:4.25:24:36:oTeQd0bL8TE:QbVj4TCnQCI:BvGOjcn9J4s:aUrOqrgdNxo:C-huhJgb_Yc:1GLsKwsZ4Mc:IZgmlToxrtA:wMVdfGw9ICY:yYCKV9j9iDE:we8lvUwTw4Q:GYiZLptt9iY:qEQZh_1Gg8o:OYSr3DUS6SM:6VCHmNTXlqU:TJ642QGq71o:gGSwve_BU9I:bRr5PUAR7y0:Vauo9QubndQ:zhqJZ47mf24:FSPZ_WwPa-U
vL7FcvEydqg:stevetures:454:Comedy:20:123984:4.75:297:149:NGTKsMW0UQU:jfFNA38bv78:8vzqspFxNe4:fkPKXkloTX4:_uE7KlbWEQc:W1Uo5DQTtzc:RojBCLlrZBo:JyU3mJAr-Tc:oTeQd0bL8TE:sLxa9oxs0DA:xlxmwvWekZA:vhbByP2fgjQ:TlZ-ktadTTI:ejYFKT4sYYM:fD0n-eXUH4g:ePRlxk1jsvk:H0xuFSlLuvE:9tH2s1GU3yQ:SOi0KK-E8zs:aB1rKbXTS_g
Z37h1mNQQ3U:henderob:560:Comedy:94:2091:4.67:24:14:JyU3mJAr-Tc:7hOyO2KVd_I:jfFNA38bv78:fD0n-eXUH4g:IpEM57w4_NU:W1Uo5DQTtzc:fkPKXkloTX4:xlxmwvWekZA:_uE7KlbWEQc:RojBCLlrZBo:TlZ-ktadTTI:ePRlxk1jsvk:0LKZbvTRNVI:SOi0KK-E8zs:vsF95bYZcB4:x2sXC0s2iqU:Ptkt4DT14v8:tHLFlEUZ508:Cw7t5eHVlc4:y2qPRxUuPhc
8vzqspFxNe4:mcdcrook:412:Comedy:20:10690:4.63:30:15:vL7FcvEydqg:jfFNA38bv78:fkPKXkloTX4:_uE7KlbWEQc:W1Uo5DQTtzc:RojBCLlrZBo:JyU3mJAr-Tc:TlZ-ktadTTI:oTeQd0bL8TE:sLxa9oxs0DA:xlxmwvWekZA:vhbByP2fgjQ:fD0n-eXUH4g:ejYFKT4sYYM:ePRlxk1jsvk:H0xuFSlLuvE:ltJ98vbJprI:HQo63uIZt88:9tH2s1GU3yQ:SOi0KK-E8zs
x2sXC0s2iqU:DayfallKat:734:News & Politics:38:129:5:3:10:qCC8ctaJq5Y:gbPMyy7DDHY:auueuhld5Ss
tHLFlEUZ508:Godlesspanther:735:Comedy:58:87:5:8:7

user.txt

metalproud,20,2
nj5544,1,0
nader626,1,0
paris3970316,6,18
munirayesh123,9,19
peachylicous101,4,6
nilafreaks,1,3
nato101010,13,49
metalsilver5000,5,5
mikele90,14,2
mario12475,6,59
raffirocks,136,18
rolfcopter1488,3,34
ngc4487,5,0
protogeist,13,5
mizzladybug,2,3
ruthgledhillfranks,43,1
pigsaregood,1,0
pinkpoodle11,1,1
madlyinsane01,7,0
rusty2spoon,1,8
olivertraylen,27,7
monimiki,2,0
rabapab23,12,13
rebeccabloomfield,1,0
marymoe72,3,2
rigbiggreg,1,0
match2020,6,0
mohdzaki77,74,0
ohconspiracy,66,14
penomenon,1,0
misskayhisgirl,6,3
rodlenamusicvideos,1,0
powerchairchick,2,4
made2disobey,1,0
picklecatcus,14,18
orangepopberlin,1,0
navijatt,4,18
rockfanatic92,0,17
misanthrope8181,0,0
radiocanta,20,1
ohxyeahxx,3,14
oneandonlybarbie,2,0
missraymondbrown,7,117
rianlsoares,5,0
monscooch,1,0
rapture00,4,0
makinmoooo,1,0
micherooo,6,1
nolimitnlr,11,0
oboesruleiloveunc,20,2
mckinleythueson333,54,25
mattern393,2,0
maximusfart,2,0
moyimus,7,8
miguetortu,10,0
mj1014,1,0
rockboa23,1,0
mayorbruno,2,0

数据说明:
表1-1 视频表
在这里插入图片描述
表1-2 用户表
在这里插入图片描述
原始数据示例:

qR8WRLrO2aQ:mienge:406:People & Blogs:599:2788:5:1:0:4UUEKhr6vfA:zvDPXgPiiWI:TxP1eXHJQ2Q:k5Kb1K0zVxU:hLP_mJIMNFg:tzNRSSTGF4o:BrUGfqJANn8:OVIc-mNxqHc:gdxtKvNiYXc:bHZRZ-1A-qk:GUJdU6uHyzU:eyZOjktUb5M:Dv15_9gnM2A:lMQydgG1N2k:U0gZppW_-2Y:dUVU6xpMc6Y:ApA6VEYI8zQ:a3_boc9Z_Pc:N1z4tYob0hM:2UJkU2neoBs

预处理之后的数据示例:

qR8WRLrO2aQ:mienge:406:People ,Blogs:599:2788:5:1:0:4UUEKhr6vfA,zvDPXgPiiWI,TxP1eXHJQ2Q,k5Kb1K0zVxU,hLP_mJIMNFg,tzNRSSTGF4o,BrUGfqJANn8,OVIc-mNxqHc,gdxtKvNiYXc,bHZRZ-1A-qk,GUJdU6uHyzU,eyZOjktUb5M,Dv15_9gnM2A,lMQydgG1N2k,U0gZppW_-2Y,dUVU6xpMc6Y,ApA6VEYI8zQ,a3_boc9Z_Pc,N1z4tYob0hM,2UJkU2neoBs

1、对原始数据进行预处理,格式为上面给出的预处理之后的示例数据。
通过观察原始数据形式,可以发现,每个字段之间使用“:”分割,视频可以有多个视频类别,类别之间&符号分割,且分割的两边有空格字符,同时相关视频也是可以有多个,多个相关视频也是用“:”进行分割。为了分析数据时方便,我们首先进行数据重组清洗操作。
即:将每条数据的类别用“,”分割,同时去掉两边空格,多个“相关视频id”也使用“,”进行分割
数据预处理代码:
#Map类:

package com.czxy.demo01;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class Map extends Mapper<LongWritable, Text, Text, Text> {
    static Configuration conf;
    static FileSystem fileSystem;
    static FSDataOutputStream output;

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        conf = context.getConfiguration();
        fileSystem = FileSystem.get(conf);
        output = fileSystem.create(new Path("F:\\第三学期大数据资料\\day03~12月\\第五周\\day01\\vido\\aa\\video.txt"));
    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        if (line.contains("&")) {
            line = line.replaceAll(" & ", ",");
        }
        String[] split = line.split(":");
        if (split.length < 10) {
            return;
        }
        int index = 0;
        try {
            index = line.indexOf(split[9]);
        } catch (Exception e) {
            System.out.println(split.length);
            return;
        }
        String[] data = line.split(":");
        if (data.length < 10) {
            System.out.println(line);
            return;
        }
        StringBuilder sb = new StringBuilder();
        sb.append(line.substring(0, index)).append(line.substring(index).replace(":", ",").trim()).append("\n");
        output.write(sb.toString().getBytes());
    }


    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        if (output != null) {
            output.close();
        }

    }
}

#driver 驱动类:

package com.czxy.demo01;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class DataDriver  {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration(true);
        FileSystem fileSystem = FileSystem.get(conf);
        Job job = Job.getInstance(conf);
        job.setJarByClass(DataDriver.class);
 Path InputPath = new Path("F:\\第三学期大数据资料\\day03~12月\\第五周\\day01\\vido\\video.txt");
        FileInputFormat.addInputPath(job,InputPath);
        Path OutputPath = new Path("F:\\out2");
        FileOutputFormat.setOutputPath(job,OutputPath);
        if (fileSystem.exists(OutputPath))fileSystem.delete(OutputPath,true);
        FileOutputFormat.setOutputPath(job,OutputPath);
        job.setMapperClass(Map.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.waitForCompletion(true);
    }
}

2、把预处理之后的数据进行入库到hive中
2.1创建数据库和表
创建数据库名字为:video
创建原始数据表:
视频表:video_ori 用户表:video_user_ori
创建ORC格式的表:
视频表:video_orc 用户表:video_user_orc
给出创建原始表语句
创建video_ori视频表:

create table video_ori(
videoId string,
uploader string,
age int,
category array,
length int,
views int,
rate float,
ratings int,
comments int,
relatedId array)
row format delimited
fields terminated by “:”
collection items terminated by “,”
stored as textfile;

创建video_user_ori用户表:

create table video_user_ori(
uploader string,
videos int,
friends int)
row format delimited
fields terminated by “,”
stored as textfile;

请写出ORC格式的建表语句:
创建video_orc表

create table video_orc(
videoId string,
uploader string,
age int,
category array,
length int,
views int,
rate float,
ratings int,
comments int,
relatedId array)
row format delimited
fields terminated by “:”
collection items terminated by “,”
stored as orcfile;

创建video_user_orc表:

create table video_user_orc(
uploader string,
videos int,
friends int)
row format delimited
fields terminated by “,”
stored as orcfile;

2.2分别导入预处理之后的视频数据到原始表video_ori和导入原始用户表的数据到video_user_ori中
请写出导入语句:
video_ori:

load data local inpath ‘/export/video.txt’ overwrite into table video_ori ;

video_user_ori:

load data local inpath ‘/export/user.txt’ overwrite into table video_user_ori ;

2.3从原始表查询数据并插入对应的ORC表中
请写出插入语句:
video_orc:

insert into table video_orc select * from video_ori ;

video_user_orc:

insert into table video_user_orc select * from video_user_ori ;

3、对入库之后的数据进行hivesql查询操作
3.1从视频表中统计出视频评分为5分的视频信息,把查询结果保存到/export/rate.txt
请写出sql语句:

hive -e "select * from video_orc where rate =5.0 " > /export/rate.txt
向上面这样写还得如下操作过滤:
sed -i -e ‘s/[/ /g’ /export/rate.txt;
sed -i -e ‘s/”/ /g’ /export/rate.txt;
sed -i -e ‘s/]/ /g’ /export/rate.txt;

第二种操作不用过滤 insert overwrite local directory '/export/rate' row format delimited fields terminated by '\t' collection items terminated by ',' select * from video_orc where rate = 5;

3.2从视频表中统计出评论数大于100条的视频信息,把查询结果保存到/export/comments.txt
请写出sql语句:

hive -e “select * from video_orc where comments >100” > /export/comments.txt;
向上面这样写还得如下操作过滤:
sed -i -e ‘s/[/ /g’ /export/rcomments.txt;
sed -i -e ‘s/”/ /g’ /export/comments.txt;
sed -i -e ‘s/]/ /g’ /export/comments.txt;

第二种 不用 过滤 insert overwrite local directory '/export/comments' row format delimited fields terminated by '\t' collection items terminated by ',' select * from video_orc where comments >100;

4、把hive分析出的数据保存到hbase中
4.1创建hive对应的数据库外部表
请写出创建rate外部表的语句:

create external table video_rate(
videoId string,
uploader string,
age int,
category array,
length int,
views int,
rate float,
ratings int,
comments int,
relatedId array)
row format delimited
fields terminated by “\t”
collection items terminated by “,”

请写出创建comments外部表的语句:

create external table video_comments(
videoId string,
uploader string,
age int,
category array,
length int,
views int,
rate float,
ratings int,
comments int,
relatedId array)
row format delimited
fields terminated by “\t”
collection items terminated by “,”

4.2加载第3步的结果数据到外部表中
请写出加载语句到rate表:

load data local inpath ‘/export/rate.txt’ into table video_rate;

请写出加载语句到comments表:

load data local inpath ‘/export/comments.txt’ into table video_comments;

4.3创建hive管理表与HBase进行映射
给出此步骤的语句
Hive中的rate,comments两个表分别对应hbase中的hbase_rate,hbase_comments两个表
创建hbase_rate表并进行映射:

create table hbase_rate(
key string,
videoId string,
uploader string,
age int,
category array,
length int,
views int,
rate float,
ratings int,
comments int,
relatedId array)
stored by ‘org.apache.hadoop.hive.hbase.HBaseStorageHandler’
with serdeproperties(“hbase.columns.mapping” = “:key,data:videoId,data:uploader,data:age,data:category,data:length,data:views,data:rate,data:ratings,data:comments,data:relatedId”)
tblproperties(“hbase.table.name” = “hbase_rate”);

创建hbase_comments表并进行映射:

create table hbase_comments(
key string,
videoId string,
uploader string,
age int,
category array,
length int,
views int,
rate float,
ratings int,
comments int,
relatedId array)
stored by ‘org.apache.hadoop.hive.hbase.HBaseStorageHandler’
with serdeproperties(“hbase.columns.mapping” = “:key,data:videoId,data:uploader,data:age,data:category,data:length,data:views,data:rate,data:ratings,data:comments,data:relatedId”)
tblproperties(“hbase.table.name” = “hbase_comments”);

4.4请写出通过insert overwrite select,插入hbase_rate表的语句

insert overwrite table hbase_rate select row_number() over(order by videoId) key,videoId,uploader,age,category,length,views,rate,ratings,comments,relatedId from video_rate;
insert into table hbase_rate select * from video_rate;

请写出通过insert overwrite select,插入hbase_comments表的语句

insert overwrite table hbase_comments select row_number() over(order by videoId) key,videoId,uploader,age,category,length,views,rate,ratings,comments,relatedId from video_comments;
insert into table hbase_comments select * from video_comments;

5.通过hbaseapi进行查询操作
5.==1请使用hbaseapi 对hbase_rate表,按照通过startRow scan ‘hbase_rate’,
{STARTROW=>‘1’,STOPROW=>‘100’}
Key=1和endRowKey=100进行扫描查询出结果。
#hbaseAPl

 public static void shu()throws Exception{
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181");
        Connection connection = ConnectionFactory.createConnection(conf);

        Table table = connection.getTable(TableName.valueOf("hbase_rate"));
        Scan scan = new Scan();
        scan.setStartRow("1".getBytes());
        scan.setStopRow("100".getBytes());
        ResultScanner scanner = table.getScanner(scan);
        for (Result result : scanner) {
            //String key = Bytes.toString(result.getRow());
            //String va = Bytes.toString(result.getValue("cf".getBytes(), "age".getBytes()));
            System.out.println("key  "+Bytes.toString(result.getRow()));
            System.out.println("age  "+Bytes.toString(result.getValue("data".getBytes(), "age".getBytes())));
        }
        connection.close();
       }

5.2请使用hbaseapi对hbase_comments表,只查询comments列的值。

package com.czxy.demo01;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

#hbaseAPl
public class demo {
    public static void scandata() throws Exception {
        //连接数据库
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181");
        Connection connection = ConnectionFactory.createConnection(conf);
        //获取报表
        Table table = connection.getTable(TableName.valueOf("hbase_comments"));
        Scan scan = new Scan();
        ResultScanner scanner = table.getScanner(scan);
        for (Result result : scanner) {
            System.out.println(Bytes.toString(result.getValue("data".getBytes(), "comments".getBytes())));
        }
        //关闭连接
        connection.close();
    }
    public static void main(String[] args) throws Exception {
        scandata();
    }
}
发布了93 篇原创文章 · 获赞 288 · 访问量 18万+

猜你喜欢

转载自blog.csdn.net/qq_45765882/article/details/103805464
今日推荐