/** * Get the category id string of the fresh food in Yihaodian * @param filePath * @return */ public String getYHDSXCategoryIdStr(String filePath) { final String DELIMITER = new String(new byte[]{1}); final String INNER_DELIMITER = ","; // loop through all files in the directory BufferedReader br = null; try { FileSystem fs = FileSystem.get(new Configuration()); FileStatus[] status = fs.listStatus(new Path(filePath)); for (FileStatus file : status) { if (!file.getPath().getName().startsWith("part-")) { continue; } FSDataInputStream inputStream = fs.open(file.getPath()); br = new BufferedReader(new InputStreamReader(inputStream)); String line = null; while (null != (line = br.readLine())) { String[] strs = line.split(DELIMITER); String categoryId = strs[0]; String categorySearchName = strs[9]; if (-1 != categorySearchName.indexOf("0-956955")) { yhdsxCategoryIdStr += (categoryId + INNER_DELIMITER); } }// end of while } } catch (IOException e) { e.printStackTrace (); } finally { try { br.close(); } catch (IOException e) { e.printStackTrace (); } } return yhdsxCategoryIdStr; }
Directories and files on HDFS:
-bash-3.2$ hadoop fs -ls /user/hive/warehouse/category Found 2 items -rw-r--r-- 2 basicdata supergroup 0 2014-08-18 01:05 /user/hive/warehouse/category/_SUCCESS -rw-r--r-- 2 basicdata supergroup 1117233 2014-08-18 01:05 /user/hive/warehouse/category/part-m-00000
document content:
-bash-3.2$ hadoop fs -cat /user/hive/warehouse/category/part-* | more 52880000052880 Cereals Combination 51621010-5135-5162-5288: Food-Grain Oil-Miscellaneous Grains Combination 011 9726780009726787 Tea Set 19650290040-950340-965029-972678:Home-Tableware-Water-Tea Set 9701550009701557 Fiskars 9702350009702356 Living Services 000180-970235: Living Services 011 9702360009702367SIM Card 011 5315T602 Personal Cleaning 5183190null0-5134-5183-5315: Kitchen Cleaning - Cleaners - Personal Cleaning 111 5316T603 Household Cleaning 5183190null0-5134-5183-5316: Kitchen Cleaning-Cleaner-Household Cleaning 111 5317UD02 Egg Products 1516110null0-5135-5161-5317: Food and Beverage - Pickled Products 1 - Egg Products 1111 5318UD030 Meat Products 5161180null0-5135-5161-5318: Food and Beverage - Preserved Products - Meat Products 111