C# batch read word and save the text to txt, you can save pictures

using Spire.Doc;
using Spire.Doc.Documents;
using Spire.Doc.Fields;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using Word;

namespace WindowsFormsApp2
{     /// <summary>     /// Main form class (used to get text and pictures in word documents)     /// </summary>     public partial class frmMain: Form     {         /// <summary>         // / Read the document content asynchronous thread         /// </summary>         private BackgroundWorker _readDocWorker = null;








        /// <summary>
        /// 文档路径
        /// </summary>
        private string _docPath = string.Empty;
        private string _dirPath = string.Empty;

        /// <summary>
        /// The absolute path of the name of the word file
        /// </summary>
        List<string> ListOfName = new List<string>();

 


        /// <summary>
        /// 窗体加载事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void FrmMain_Load(object sender, EventArgs e)
        {
            _readDocWorker = new BackgroundWorker();
            _readDocWorker.DoWork += _readDocWorker_DoWork;
            _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;             
        }


        /// <summary>
        /// 测试多线程处理
        /// </summary>
        /// <param name="filename"></param>
        private void ReadDoc(string filename)
        {
            if (File.Exists(filename))
            {
                BackgroundWorker _readDocWorker = new BackgroundWorker();
               
                _readDocWorker.DoWork += _readDocWorker_DoWork;
                _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
               
                _readDocWorker.RunWorkerAsync(filename);
            }

        }
        /// <summary>
        /// Select the document button click event
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></ param>
        private void btnChooseFile_Click(object sender, EventArgs e)
        {             var openfile = new OpenFileDialog();

            openfile.Filter = "Document(*.doc;*.docx)|*.doc;*.docx";
            openfile.Title = "Please select a document";

            if (openfile.ShowDialog() == DialogResult.OK)
            {
                _docPath = openfile.FileName;
                this.richTxtBox.Text = "正在加载。。。";
                this.btnChooseFile.Enabled = false;

                _readDocWorker.RunWorkerAsync();
            }
            else
            {                 this.richTxtBox.Text = "Please select a document";             }         }


        /// <summary>
        /// 读取文档内容事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void _readDocWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            var deskPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
            
            var imgName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath);

            //Extract pictures synchronously from the document

            UtilsDocument.GetWordImageSync(_docPath, imgName);

            //Read the text content in the document

            var content = ReadWPSContent(_docPath);

            if (!string.IsNullOrEmpty(content) && !string.IsNullOrEmpty(_docPath))
            {
                StringBuilder sb = new StringBuilder(content);
                var txtName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath) + "(解析).txt";

                FileStream fs = new FileStream(txtName, FileMode.OpenOrCreate, FileAccess.ReadWrite);
                StreamWriter sw = new StreamWriter(fs);

                sw.Write(content);

                sw.Close();
                fs.Close();
            }

            e.Result = content;

            Thread.Sleep(10000);
        }

        /// <summary>
        /// 读取文档内容完成事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void _readDocWorker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            var content = (string)e.Result;

            if (!string.IsNullOrEmpty(content))
            {
                this.richTxtBox.Text = content;

                #region 显示进度
                try
                {
                    int i = Int16.Parse(progressBar.Tag.ToString());
                   
                    if (i != 0)
                    {
                        progressBar.Value += 100 / i;

                        num_lbl.Text = progressBar.Value.ToString();
                    }
                    else
                    {
                        num_lbl.Text = "0/0";
                    }
                }
                catch (Exception ee)
                {

                }
                #endregion

            }
            else
            {                 this.richTxtBox.Text = "Failed to read";             }

            this.btnChooseFile.Enabled = true;
        }

        /// <summary>
        /// Constructor
        /// </summary>
        public frmMain()
        {             InitializeComponent();

            this.Load += FrmMain_Load;
        }

        /// <summary>
        /// Read the content of the WPS document (WPS API is used here)
        /// </summary>
        /// <param name="docPath"></param>
        private string ReadWPSContent( string docPath)
        {             //Define Word instance and document instance

            var word = new Word.Application();
            var doc = new Word.Document();
            var txtContent = string.Empty;

            try
            {                 //Set the parameters to open the document, here is read-only open

                object name = docPath;
                object Range = System.Reflection.Missing.Value;
                object unknow = Type.Missing;
                object isReadOnly = true;

                //Open the document in the given directory

                word.Visible = false;

                doc = word.Documents.Open(ref name, ref unknow, ref isReadOnly, ref unknow, ref unknow,
                    ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow,
                    ref unknow, ref unknow, ref unknow, ref unknow);

                //Select all the data in the document and copy it to the clipboard

                doc.ActiveWindow.Selection.WholeStory();
                doc.ActiveWindow.Selection.Copy();

                //Get the data on the current clipboard

                IDataObject data = null;

                if (this.InvokeRequired)
                {
                    this.Invoke((Action)delegate
                    {
                        data = Clipboard.GetDataObject();
                    });
                }
                else
                {
                    data = Clipboard.GetDataObject();
                }

                if (data != null)
                {                     //Get text type data

                    if (data.GetDataPresent(DataFormats.Text))
                    {
                        txtContent = (string)data.GetData(DataFormats.Text);
                    }
                    else
                    {
                        txtContent = string.Empty;
                    }
                }
                else
                {
                    txtContent = string.Empty;
                }
            }
            catch (Exception exc)
            {
                txtContent = string.Empty;
            }
            finally
            {
                if (doc != null)
                {
                    doc.Close();
                    doc = null;
                }

                if (word != null)
                {
                    word.Quit();
                    word = null;
                }
            }

            return txtContent;
        }

        /// <summary>
        /// Get the file name button in the directory
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e">< /param>
        private void btnDirectChoose_Click(object sender, EventArgs e)
        {             FolderBrowserDialog fbd = new FolderBrowserDialog();             fbd.SelectedPath = "D:\\003, historical projects\\219, performance evaluation system\\project documents\\2019, Judicial Archives Work in 2020\\2019-Complete Data";

            

            fbd.SelectedPath = "C:\\Users\\HUAWEI\\Desktop\\test";
            DialogResult result = fbd.ShowDialog();
            progressBar.Tag = 0;

            if (result == DialogResult.OK && !string.IsNullOrWhiteSpace(fbd.SelectedPath))
            {
                _dirPath = fbd.SelectedPath;
                string[] files = GetFilename(_dirPath);
                if (null != files)
                {
                    for(int i = 0; i < files.Length; i++)
                    {
                        FileListBox.Items.Add(files[i]);
                        //ReadDoc(files[i]);
                        //break;
                    }
                }
                progressBar.Tag = files.Length;
                
            }

            conversion();

            //MessageBox.Show(_dirPath);
        }
        
        /// <summary>
        /// Get a list of all files in the directory
        /// </summary>
        /// <param name="_dirPath"></param>
        / // <returns></returns>
        private string [] GetFilename (string _dirPath)
        {             string[] files =null;             DirectoryInfo dire = new DirectoryInfo(_dirPath);             FileInfo[] fileinfo = dire.GetFiles();             files = new string [fileinfo.Length];

            


            

            for (int i = 0; i < fileinfo.Length; i++)
            {
                files[i]=fileinfo[i].FullName;
                ListOfName.Add(fileinfo[i].FullName);
            }
            return files;
        }

        /// <summary>
        /// 批量转换
        /// </summary>
        private void conversion()
        {
            for (int i = 0; i < ListOfName.Count; i++)
            {
                _docPath = ListOfName[i];

                /*
                if (i == 0)
                {
                    _readDocWorker = new BackgroundWorker();
                    _readDocWorker.DoWork += _readDocWorker_DoWork;
                    _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
                }
                else
                {
                    _readDocWorker = null;
                    _readDocWorker = new BackgroundWorker();
                    _readDocWorker.DoWork += _readDocWorker_DoWork;
                    _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
                }
                


                _readDocWorker.RunWorkerAsync();
                */

                var deskPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);

                var imgName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath);

                //Extract pictures synchronously from the document

                UtilsDocument.GetWordImageSync(_docPath, imgName);

                //Read the text content in the document

                var content = ReadWPSContent(_docPath);

                if (!string.IsNullOrEmpty(content) && !string.IsNullOrEmpty(_docPath))
                {
                    StringBuilder sb = new StringBuilder(content);
                    var txtName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath) + "(解析).txt";

                    FileStream fs = new FileStream(txtName, FileMode.OpenOrCreate, FileAccess.ReadWrite);
                    StreamWriter sw = new StreamWriter(fs);

                    sw.Write(content);

                    sw.Close();
                    fs.Close();
                }


            }
                
        }

    }
}

 

Guess you like

Origin blog.csdn.net/qq_14874791/article/details/113857308