matlab 读取txt文件以及进行数据处理

      周日做了信息论的小project,差不读熬了一个晚上加周一的早上,终于利用matlab成功的读取了txt文件中的英文单词以及简单的数据处理,现在进行简单的分享。
       百度经验:matlab如何读取txt文件:                     https://jingyan.baidu.com/article/b87fe19e6b478852183568e1.html
      
 代码:
  1. function [] = work3()
  2. clc
  3. clear all
  4. close all
  5. %% read data
  6. ch = fileread('harry1.txt');  
  7. ch = strrep(ch,',',' ');   %%将逗号换成空格
  8. ch = lower(ch);           %%排成一行
  9. ch = reshape(strsplit(ch),[],1);     %%将cell类型的数据转换成char,每个单词一行。
  10. %% 1-gram
  11. gram1=ch;
  12. [words,~,idx] = unique(char(gram1),'rows');
  13. numOccurrences = histcounts(idx,length(words));
  14. numOccurrences =sort(numOccurrences );
  15. [err1,H1]=errH(numOccurrences);
  16. %% 2-gram
  17. clear words idx numOccurrences
  18. gram2=char(ch);
  19. [gram2_row,~]=size(gram2);
  20. for i=1:gram2_row/2
  21.     gram2_reshap(i,:)=[gram2(2*i-1,:) gram2(2*i,:)];
  22. end
  23. [words,~,idx] = unique(gram2_reshap,'rows');
  24. [word_row,~]=size(words);
  25. numOccurrences = histcounts(idx,word_row);
  26. numOccurrences =sort(numOccurrences );
  27. [err2,H2]=errH(numOccurrences);
  28. %% 3-gram
  29. clear words idx numOccurrences
  30. gram3=char(ch);
  31. [gram3_row,~]=size(gram3);
  32. for i=1:gram3_row/3
  33.     gram3_reshap(i,:)=[gram3(3*i-2,:) gram3(3*i-1,:) gram3(3*i,:)];
  34. end
  35. [words,~,idx] = unique(gram3_reshap,'rows');
  36. [word_row,~]=size(words);
  37. numOccurrences = histcounts(idx,word_row);
  38. numOccurrences =sort(numOccurrences );
  39. [err3,H3]=errH(numOccurrences);






  40. figure
  41. stairs(err1,H1/H1(1),'r')
  42. titleName = ['N=',num2str(H1(1))];
  43. hold on
  44. stairs(err2,H2/H2(1),'b')
  45. hold on
  46. stairs(err3,H3/H3(1),'k')

  47. title(titleName,'fontsize',16,'fontweight','bold');
  48. xlabel('误差','fontsize',16,'fontweight','bold');
  49. ylabel('H/N','fontsize',16,'fontweight','bold');
  50. legend('1-gram','2-gram','3-gram');


  51. end

  52. %%
  53. %% 纠错函数
  54. function [err,H]=errH(numOccurrences)
  55. x_remain=sum(numOccurrences);
  56. p_num=numOccurrences /sum(numOccurrences);
  57. num=1;
  58. err(1)=0;
  59. H(1)=log2(x_remain);
  60. for r=1:length(numOccurrences)
  61. %     hwait=waitbar(num/sum(numOccurrences),'请等待>>>>>>>>');

  62.     for n=1:numOccurrences(r)
  63.     num=num+1;
  64.     x_remain=x_remain-1;
  65.     err(num)=err(num-1)+p_num(r)/numOccurrences(r);
  66.     H(num)=log2(x_remain);
  67.     end
  68. end
  69. end
  70.   

猜你喜欢

转载自blog.csdn.net/qq_29468403/article/details/80681604