Reading and writing of MongoDB database files

First, use Docker to start a MongoDB 4.0.13 container on ubuntu16.04 . The basic operations are as follows:

hpl@hepengli:~$ sudo -i
[sudo] hpl 的密码： 
root@hepengli:~# systemctl start docker
root@hepengli:~# docker images
REPOSITORY   TAG       IMAGE ID   CREATED   SIZE
root@hepengli:~# docker pull mongo:4.0.13
4.0.13: Pulling from library/mongo
976a760c94fc: Pull complete 
c58992f3c37b: Pull complete 
0ca0e5e7f12e: Pull complete 
f2a274cc00ca: Pull complete 
7f6568107a70: Pull complete 
08957b2477b2: Pull complete 
a66dbd57d2a3: Pull complete 
135a9132a862: Pull complete 
c9b7c17ba1df: Pull complete 
7b595c9e65de: Pull complete 
5e74b440cab6: Pull complete 
9c04ed8f6ca0: Pull complete 
3dbe824121e4: Pull complete 
Digest: sha256:1aea2377c5b17dcd5e67d4c33f8f7ee09da62a4dd27ae988958b70efdf3a0a6a
Status: Downloaded newer image for mongo:4.0.13
docker.io/library/mongo:4.0.13
root@hepengli:~# docker images
REPOSITORY   TAG       IMAGE ID       CREATED       SIZE
mongo        4.0.13    0712bd00d695   2 years ago   416MB
root@hepengli:~# docker run -d -p 27017:27017 --name mongo4.0 0712bd00d695
1e81e661666a5aa01df516663058de8b768a7b09fafa5c53b832c14a6122a97a
root@hepengli:~# docker ps
CONTAINER ID   IMAGE          COMMAND                  CREATED         STATUS         PORTS                                           NAMES
1e81e661666a   0712bd00d695   "docker-entrypoint.s…"   5 seconds ago   Up 3 seconds   0.0.0.0:27017->27017/tcp, :::27017->27017/tcp   mongo4.0

View the IP address of linux: 192.168.23.129

Navicat can be used on the host to test the connection:

For small files within 16M, you can use MongoDB to store them directly:

import os
import datetime
from pymongo import MongoClient


class DBDataSave(object):
    client = MongoClient('mongodb://192.168.23.129:27017')
    db = client.file_data  # 使用的数据库
    collection = db.datas  # 使用数据库中的集合名称

    def upload_little_file(self):
        upload_dir = r'G:/project/study1/others/mongo_test/upload_files'
        file_list = os.listdir(upload_dir)
        for file in file_list:
            file_path = upload_dir + '/' + file
            file_division = file.split('.')
            file_size = os.stat(file_path).st_size
            f = open(file_path, 'rb')
            data = f.read()
            doc = [{
                    "file_name": file,
                    "filet_type": file_division[-1],
                    'content': data,
                    "size": file_size,
                    'add_time': datetime.datetime.now()
                    }]
            f.close()
            self.collection.insert_many(doc)

    def download_little_file(self):
        if not os.path.exists('./download_files'):
            os.mkdir('./download_files')
        files = self.collection.find()
        for file in files:
            with open('./download_files/' + f"{file.get('file_name')}", 'wb') as f:
                f.write(file.get('content'))


if __name__ == '__main__':
    dds = DBDataSave()
    # dds.upload_little_file()  # 存储文件
    dds.download_little_file()  # 导出文件

MongoDB has a built-in file system called GridFS, which can be used to store files larger than 16M:

GridFS has the ability to manage files in a distributed manner, which can break through the limitations of general file systems on files, and store them in segments, unlike ordinary file systems that store the entire file. This will not take up a lot of memory when reading large files.

from pymongo import MongoClient
from gridfs import *
import os


class DBDataSaveGF(object):
    client = MongoClient('mongodb://192.168.23.129:27017')  # 连接本地的MongoDB数据库
    db = client.file_data
    file_put = GridFS(db)

    def db_write_file(self):
        upload_path = r'G:/project/study1/others/mongo_test/upload_files'
        files = os.listdir(upload_path)  # 列出文件夹下的所有文件
        for file in files:
            file_path = upload_path + '\\' + file
            with open(file_path, 'rb') as f1:
                self.file_put.put(f1, content_type=file.split('.')[-1], filename=file)

    def db_read_file(self):
        if not os.path.exists('./download_files'):
            os.mkdir('./download_files')
        files = self.file_put.find()
        # files = file_put.find().sort("uploadDate", -1).limit(1)  # 返回最近上传的1个文件
        for file in files:
            with open('./download_files/' + file.filename, 'wb') as f1:
                f1.write(file.read())


if __name__ == '__main__':
    db = DBDataSaveGF()
    # db.db_write_file()  # 存储文件
    db.db_read_file()  # 读取文件