C# 批量读取word并且把文字保存到txt,可以保存图片

using Spire.Doc;
using Spire.Doc.Documents;
using Spire.Doc.Fields;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using Word;

namespace WindowsFormsApp2
{
    /// <summary>
    /// 主窗体类(用于获取word文档中的文字和图片)
    /// </summary>
    public partial class frmMain : Form
    {
        /// <summary>
        /// 读取文档内容异步线程
        /// </summary>
        private BackgroundWorker _readDocWorker = null;

        /// <summary>
        /// 文档路径
        /// </summary>
        private string _docPath = string.Empty;
        private string _dirPath = string.Empty;

        /// <summary>
        /// word文件的名字的绝对路径
        /// </summary>
        List<string> ListOfName = new List<string>();


        /// <summary>
        /// 窗体加载事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void FrmMain_Load(object sender, EventArgs e)
        {
            _readDocWorker = new BackgroundWorker();
            _readDocWorker.DoWork += _readDocWorker_DoWork;
            _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;             
        }


        /// <summary>
        /// 测试多线程处理
        /// </summary>
        /// <param name="filename"></param>
        private void ReadDoc(string filename)
        {
            if (File.Exists(filename))
            {
                BackgroundWorker _readDocWorker = new BackgroundWorker();
               
                _readDocWorker.DoWork += _readDocWorker_DoWork;
                _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
               
                _readDocWorker.RunWorkerAsync(filename);
            }

        }
        /// <summary>
        /// 选择文档按钮点击事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnChooseFile_Click(object sender, EventArgs e)
        {
            var openfile = new OpenFileDialog();

            openfile.Filter = "文档(*.doc;*.docx)|*.doc;*.docx";
            openfile.Title = "请选择文档";

            if (openfile.ShowDialog() == DialogResult.OK)
            {
                _docPath = openfile.FileName;
                this.richTxtBox.Text = "正在加载。。。";
                this.btnChooseFile.Enabled = false;

                _readDocWorker.RunWorkerAsync();
            }
            else
            {
                this.richTxtBox.Text = "请选择文档";
            }
        }

        /// <summary>
        /// 读取文档内容事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void _readDocWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            var deskPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
            
            var imgName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath);

            //从文档中同步提取图片

            UtilsDocument.GetWordImageSync(_docPath, imgName);

            //读取文档中的文本内容

            var content = ReadWPSContent(_docPath);

            if (!string.IsNullOrEmpty(content) && !string.IsNullOrEmpty(_docPath))
            {
                StringBuilder sb = new StringBuilder(content);
                var txtName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath) + "(解析).txt";

                FileStream fs = new FileStream(txtName, FileMode.OpenOrCreate, FileAccess.ReadWrite);
                StreamWriter sw = new StreamWriter(fs);

                sw.Write(content);

                sw.Close();
                fs.Close();
            }

            e.Result = content;

            Thread.Sleep(10000);
        }

        /// <summary>
        /// 读取文档内容完成事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void _readDocWorker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            var content = (string)e.Result;

            if (!string.IsNullOrEmpty(content))
            {
                this.richTxtBox.Text = content;

                #region 显示进度
                try
                {
                    int i = Int16.Parse(progressBar.Tag.ToString());
                   
                    if (i != 0)
                    {
                        progressBar.Value += 100 / i;

                        num_lbl.Text = progressBar.Value.ToString();
                    }
                    else
                    {
                        num_lbl.Text = "0/0";
                    }
                }
                catch (Exception ee)
                {

                }
                #endregion

            }
            else
            {
                this.richTxtBox.Text = "读取失败";
            }

            this.btnChooseFile.Enabled = true;
        }

        /// <summary>
        /// 构造函数
        /// </summary>
        public frmMain()
        {
            InitializeComponent();

            this.Load += FrmMain_Load;
        }

        /// <summary>
        /// 读取WPS文档的内容(这里用的是WPS的API)
        /// </summary>
        /// <param name="docPath"></param>
        private string ReadWPSContent(string docPath)
        {
            //定义Word实例和文档实例

            var word = new Word.Application();
            var doc = new Word.Document();
            var txtContent = string.Empty;

            try
            {
                //设置打开文档的参数,这里是只读打开

                object name = docPath;
                object Range = System.Reflection.Missing.Value;
                object unknow = Type.Missing;
                object isReadOnly = true;

                //打开给定目录的文档

                word.Visible = false;

                doc = word.Documents.Open(ref name, ref unknow, ref isReadOnly, ref unknow, ref unknow,
                    ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow,
                    ref unknow, ref unknow, ref unknow, ref unknow);

                //全选文档中的数据并复制到剪切板

                doc.ActiveWindow.Selection.WholeStory();
                doc.ActiveWindow.Selection.Copy();

                //获取当前剪贴板上的数据

                IDataObject data = null;

                if (this.InvokeRequired)
                {
                    this.Invoke((Action)delegate
                    {
                        data = Clipboard.GetDataObject();
                    });
                }
                else
                {
                    data = Clipboard.GetDataObject();
                }

                if (data != null)
                {
                    //获取文本类型数据

                    if (data.GetDataPresent(DataFormats.Text))
                    {
                        txtContent = (string)data.GetData(DataFormats.Text);
                    }
                    else
                    {
                        txtContent = string.Empty;
                    }
                }
                else
                {
                    txtContent = string.Empty;
                }
            }
            catch (Exception exc)
            {
                txtContent = string.Empty;
            }
            finally
            {
                if (doc != null)
                {
                    doc.Close();
                    doc = null;
                }

                if (word != null)
                {
                    word.Quit();
                    word = null;
                }
            }

            return txtContent;
        }

        /// <summary>
        /// 获取目录下的文件名称按钮
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnDirectChoose_Click(object sender, EventArgs e)
        {
            FolderBrowserDialog fbd = new FolderBrowserDialog();
            
            fbd.SelectedPath = "D:\\003、历史项目\\219、业绩考评系统\\项目文档\\2019、2020年司法档案工作\\2019-完整数据";

            fbd.SelectedPath = "C:\\Users\\HUAWEI\\Desktop\\test";
            DialogResult result = fbd.ShowDialog();
            progressBar.Tag = 0;

            if (result == DialogResult.OK && !string.IsNullOrWhiteSpace(fbd.SelectedPath))
            {
                _dirPath = fbd.SelectedPath;
                string[] files = GetFilename(_dirPath);
                if (null != files)
                {
                    for(int i = 0; i < files.Length; i++)
                    {
                        FileListBox.Items.Add(files[i]);
                        //ReadDoc(files[i]);
                        //break;
                    }
                }
                progressBar.Tag = files.Length;
                
            }

            conversion();

            //MessageBox.Show(_dirPath);
        }
        
        /// <summary>
        /// 获取目录下的所有的文件列表
        /// </summary>
        /// <param name="_dirPath"></param>
        /// <returns></returns>
        private string [] GetFilename (string _dirPath)
        {
            string[] files =null;
            
            DirectoryInfo dire = new DirectoryInfo(_dirPath);
            FileInfo[] fileinfo = dire.GetFiles();
            
            files = new string[fileinfo.Length];

            for (int i = 0; i < fileinfo.Length; i++)
            {
                files[i]=fileinfo[i].FullName;
                ListOfName.Add(fileinfo[i].FullName);
            }
            return files;
        }

        /// <summary>
        /// 批量转换
        /// </summary>
        private void conversion()
        {
            for (int i = 0; i < ListOfName.Count; i++)
            {
                _docPath = ListOfName[i];

                /*
                if (i == 0)
                {
                    _readDocWorker = new BackgroundWorker();
                    _readDocWorker.DoWork += _readDocWorker_DoWork;
                    _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
                }
                else
                {
                    _readDocWorker = null;
                    _readDocWorker = new BackgroundWorker();
                    _readDocWorker.DoWork += _readDocWorker_DoWork;
                    _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
                }
                


                _readDocWorker.RunWorkerAsync();
                */

                var deskPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);

                var imgName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath);

                //从文档中同步提取图片

                UtilsDocument.GetWordImageSync(_docPath, imgName);

                //读取文档中的文本内容

                var content = ReadWPSContent(_docPath);

                if (!string.IsNullOrEmpty(content) && !string.IsNullOrEmpty(_docPath))
                {
                    StringBuilder sb = new StringBuilder(content);
                    var txtName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath) + "(解析).txt";

                    FileStream fs = new FileStream(txtName, FileMode.OpenOrCreate, FileAccess.ReadWrite);
                    StreamWriter sw = new StreamWriter(fs);

                    sw.Write(content);

                    sw.Close();
                    fs.Close();
                }


            }
                
        }

    }
}

猜你喜欢

转载自blog.csdn.net/qq_14874791/article/details/113857308