【Gmail to Office365 邮箱迁移踩坑总结】drive

(补)drive挺恶心的

1 onedrive本身的使用就各种不爽了 丢文件 文件损坏 迁移过来的文件打不开 需要本地下载一份打开再上传 。。。 各种问题
2 强烈吐槽 office的在线文档 excel没有数据分列这个功能 csv用户真的是要骂街了 微软的office做的没google的好 丢银! 
3 邮件中的附件是以base64存在邮件中的 :) 不手动点进不了drive 下载邮件的时候以base64写到文件中命名为相应的名字就算是下载下来了
4 我们使用本地向云同步AD账号的方式,在本地的upn和线上的不同,在员工离职之后会禁用该ad账号 然后线上会将该用户移动到已删除用户中 这个时候便无法使用api去下载该员工的grive数据 虽然已删除账号的数据30天之后才会被清空但是这个时候已经无法通过api拿到了 这种情况可以考虑先恢复账号 恢复账号如果不改upn 还会是之前的本地upn这个时候api url中要使用对应的upn才能拿到用户的数据 
5 onenote onenote文件在drive中是一个文件夹 而不是一个onenote文件 所以深度遍历文件夹时 要注意onenote文件

样例代码 线上做过一些upn兼容性修改

class OfficeManager(object):

    def __init__(self):
        self.office_token_url = OFFICE_TOKEN_URL
        self.office_graph_url = OFFICE_GRAPH_URL

    @property
    def __basic_header(self):
        return {
            "Content-Type": 'application/json'
        }

    @property
    def __token_header(self):
        token = dict(
            self.__basic_header, **{
                "Authorization": str(self.__auth__())
            })
        return token

    def __auth__(self):
        data = {
            "client_id": OFFICE_CLIENT_ID,
            "client_secret": OFFICE_CLIENT_SECRET,
            "grant_type": OFFICE_GRANT_TYPE,
            "scope": OFFICE_SCOPE,
            "resource": OFFICE_RESOURCE
        }
        try:
            auth = requests.post(self.office_token_url, data, self.__basic_header)
            res = auth.json()
            return res.get("token_type") + ' ' + res.get("access_token")
        except Exception:
            return None


    def down(self, user_id,url, name):
        r = requests.get(url, stream=True)
        with open(base_dir + '/' + user_id + '/' + name, "wb") as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
        return True


    def get_onedrive_downlink(self, user_id):
        if not user_id:
            return []
        try:
            download_list = []
            if not os.path.exists(base_dir + '/' + user_id):
                os.mkdir(base_dir + '/' + user_id)
            endpoint = '/users/'+ user_id + OFFICE_MAIL_ADDRESS + '/drive/root/children'
            res = requests.get(self.office_graph_url + endpoint, headers = self.__token_header)
            for file in res.json().get("value", []):
                download_url = file.get("@microsoft.graph.downloadUrl", "")
                download_name = file.get("name", str(time.time()))
                if download_url:
                    self.down(user_id, download_url, download_name)
                else:
                    download_list += self._get_onedrivefolder(user_id, file.get("id", ""))
            return True
        except Exception:
            return False

    def _get_onedrivefolder(self, user_id, itemid):
        download_list = []
        endpoint = '/users/' + user_id + OFFICE_MAIL_ADDRESS + '/drive/items/' + itemid + '/children'
        res = requests.get(self.office_graph_url + endpoint, headers=self.__token_header)
        for file in res.json().get("value", []):
            download_url = file.get("@microsoft.graph.downloadUrl", "")
            download_name = file.get("name", str(time.time()))
            if download_url:
                download_list.append(download_url)
                self.down(user_id, download_url, download_name)
            else:
                download_list += self._get_onedrivefolder(user_id, file.get("id", ""))
        return download_list

    def get_mail(self, user_id):
        folder_list = []
        mail_list = []
        mail_len = 0
        try:
            if not os.path.exists(base_dir + '/' + user_id):
                os.mkdir(base_dir + '/' + user_id)
            folders_endpoint = '/users/' + user_id + OFFICE_MAIL_ADDRESS + '/mailFolders?$select=id,displayName'
            f_res = requests.get(self.office_graph_url + folders_endpoint, headers=self.__token_header)
            while True:
                folder_list += (f_res.json().get('value', []))
                next_url = f_res.json().get("@odata.nextLink", "")
                if next_url:
                    f_res = requests.get(next_url, headers=self.__token_header)
                else:
                    break
            for folder in folder_list:
                folder_name = base_dir+ '/' + user_id + '/' + folder.get("displayName", str(time.time()))
                if not os.path.exists(folder_name):
                    os.mkdir(folder_name)
                mail_endpoint = '/users/' + user_id + OFFICE_MAIL_ADDRESS + '/mailFolders/' + folder.get("id", "") + '/messages'
                res = requests.get(self.office_graph_url + mail_endpoint, headers=self.__token_header)
                while True:
                    with open(folder_name + '/' + str(time.time()) + '.json', 'w+') as f:
                        mail_len += len(res.json().get("value"))
                        f.write(str(res.json()).replace('\\n\\r', ''))
                    next_url = res.json().get("@odata.nextLink", "")
                    if next_url:
                        res = requests.get(next_url, headers=self.__token_header)
                    else:
                        break
            for folder in folder_list:
                folder_name = base_dir+ '/' + user_id + '/' + folder.get("displayName", str(time.time()))
                mail_endpoint = '/users/' + user_id + OFFICE_MAIL_ADDRESS + '/mailFolders/' + folder.get("id", "") + '/messages?$filter=hasAttachments eq true'
                res = requests.get(self.office_graph_url + mail_endpoint, headers=self.__token_header)
                while True:
                    for mail in res.json().get("value", []):
                        attachment_url = '/users/' + user_id + OFFICE_MAIL_ADDRESS + '/mailFolders/' + folder.get("id", "") + \
                            '/messages/' + mail.get("id", "") + '/attachments?$top=999'
                        attachment = requests.get(self.office_graph_url + attachment_url, headers=self.__token_header)
                        for attach in attachment.json().get("value", []):
                            file = open(folder_name + '/' + attach.get("id") + '-' + attach.get("name") + str(time.time()), 'w+')
                            file.write(base64.b64decode(attach.get("contentBytes", "").decode("utf-8")))
                            file.close()
                    next_url = res.json().get("@odata.nextLink", "")
                    if next_url:
                        res = requests.get(next_url, headers=self.__token_header)
                    else:
                        break
            return mail_list
        except Exception:
            return 'Something Error'

    def mail_zip_and_remove(self, user_id):
        path = base_dir + '/' + user_id
        c1 = "zip -r {}/o365mail/O365mail{}.zip {} -r".format(base_dir, user_id, path)
        os.system(c1)
        c2 = "rm -rf {}".format(path)
        os.system(c2)
        return True

    def drive_zip_and_remove(self, user_id):
        path = base_dir + '/' + user_id
        c1 = "zip -r {}/o365drive/O365drive{}.zip {} -r".format(base_dir, user_id, path)
        os.system(c1)
        c2 = "rm -rf {}".format(path)
        os.system(c2)
        return True

猜你喜欢

转载自blog.csdn.net/CCyutaotao/article/details/80568414