(补)drive挺恶心的
1 onedrive本身的使用就各种不爽了 丢文件 文件损坏 迁移过来的文件打不开 需要本地下载一份打开再上传 。。。 各种问题
2 强烈吐槽 office的在线文档 excel没有数据分列这个功能 csv用户真的是要骂街了 微软的office做的没google的好 丢银!
3 邮件中的附件是以base64存在邮件中的 :) 不手动点进不了drive 下载邮件的时候以base64写到文件中命名为相应的名字就算是下载下来了
4 我们使用本地向云同步AD账号的方式,在本地的upn和线上的不同,在员工离职之后会禁用该ad账号 然后线上会将该用户移动到已删除用户中 这个时候便无法使用api去下载该员工的grive数据 虽然已删除账号的数据30天之后才会被清空但是这个时候已经无法通过api拿到了 这种情况可以考虑先恢复账号 恢复账号如果不改upn 还会是之前的本地upn这个时候api url中要使用对应的upn才能拿到用户的数据
5 onenote onenote文件在drive中是一个文件夹 而不是一个onenote文件 所以深度遍历文件夹时 要注意onenote文件
样例代码 线上做过一些upn兼容性修改
class OfficeManager(object):
def __init__(self):
self.office_token_url = OFFICE_TOKEN_URL
self.office_graph_url = OFFICE_GRAPH_URL
@property
def __basic_header(self):
return {
"Content-Type": 'application/json'
}
@property
def __token_header(self):
token = dict(
self.__basic_header, **{
"Authorization": str(self.__auth__())
})
return token
def __auth__(self):
data = {
"client_id": OFFICE_CLIENT_ID,
"client_secret": OFFICE_CLIENT_SECRET,
"grant_type": OFFICE_GRANT_TYPE,
"scope": OFFICE_SCOPE,
"resource": OFFICE_RESOURCE
}
try:
auth = requests.post(self.office_token_url, data, self.__basic_header)
res = auth.json()
return res.get("token_type") + ' ' + res.get("access_token")
except Exception:
return None
def down(self, user_id,url, name):
r = requests.get(url, stream=True)
with open(base_dir + '/' + user_id + '/' + name, "wb") as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
return True
def get_onedrive_downlink(self, user_id):
if not user_id:
return []
try:
download_list = []
if not os.path.exists(base_dir + '/' + user_id):
os.mkdir(base_dir + '/' + user_id)
endpoint = '/users/'+ user_id + OFFICE_MAIL_ADDRESS + '/drive/root/children'
res = requests.get(self.office_graph_url + endpoint, headers = self.__token_header)
for file in res.json().get("value", []):
download_url = file.get("@microsoft.graph.downloadUrl", "")
download_name = file.get("name", str(time.time()))
if download_url:
self.down(user_id, download_url, download_name)
else:
download_list += self._get_onedrivefolder(user_id, file.get("id", ""))
return True
except Exception:
return False
def _get_onedrivefolder(self, user_id, itemid):
download_list = []
endpoint = '/users/' + user_id + OFFICE_MAIL_ADDRESS + '/drive/items/' + itemid + '/children'
res = requests.get(self.office_graph_url + endpoint, headers=self.__token_header)
for file in res.json().get("value", []):
download_url = file.get("@microsoft.graph.downloadUrl", "")
download_name = file.get("name", str(time.time()))
if download_url:
download_list.append(download_url)
self.down(user_id, download_url, download_name)
else:
download_list += self._get_onedrivefolder(user_id, file.get("id", ""))
return download_list
def get_mail(self, user_id):
folder_list = []
mail_list = []
mail_len = 0
try:
if not os.path.exists(base_dir + '/' + user_id):
os.mkdir(base_dir + '/' + user_id)
folders_endpoint = '/users/' + user_id + OFFICE_MAIL_ADDRESS + '/mailFolders?$select=id,displayName'
f_res = requests.get(self.office_graph_url + folders_endpoint, headers=self.__token_header)
while True:
folder_list += (f_res.json().get('value', []))
next_url = f_res.json().get("@odata.nextLink", "")
if next_url:
f_res = requests.get(next_url, headers=self.__token_header)
else:
break
for folder in folder_list:
folder_name = base_dir+ '/' + user_id + '/' + folder.get("displayName", str(time.time()))
if not os.path.exists(folder_name):
os.mkdir(folder_name)
mail_endpoint = '/users/' + user_id + OFFICE_MAIL_ADDRESS + '/mailFolders/' + folder.get("id", "") + '/messages'
res = requests.get(self.office_graph_url + mail_endpoint, headers=self.__token_header)
while True:
with open(folder_name + '/' + str(time.time()) + '.json', 'w+') as f:
mail_len += len(res.json().get("value"))
f.write(str(res.json()).replace('\\n\\r', ''))
next_url = res.json().get("@odata.nextLink", "")
if next_url:
res = requests.get(next_url, headers=self.__token_header)
else:
break
for folder in folder_list:
folder_name = base_dir+ '/' + user_id + '/' + folder.get("displayName", str(time.time()))
mail_endpoint = '/users/' + user_id + OFFICE_MAIL_ADDRESS + '/mailFolders/' + folder.get("id", "") + '/messages?$filter=hasAttachments eq true'
res = requests.get(self.office_graph_url + mail_endpoint, headers=self.__token_header)
while True:
for mail in res.json().get("value", []):
attachment_url = '/users/' + user_id + OFFICE_MAIL_ADDRESS + '/mailFolders/' + folder.get("id", "") + \
'/messages/' + mail.get("id", "") + '/attachments?$top=999'
attachment = requests.get(self.office_graph_url + attachment_url, headers=self.__token_header)
for attach in attachment.json().get("value", []):
file = open(folder_name + '/' + attach.get("id") + '-' + attach.get("name") + str(time.time()), 'w+')
file.write(base64.b64decode(attach.get("contentBytes", "").decode("utf-8")))
file.close()
next_url = res.json().get("@odata.nextLink", "")
if next_url:
res = requests.get(next_url, headers=self.__token_header)
else:
break
return mail_list
except Exception:
return 'Something Error'
def mail_zip_and_remove(self, user_id):
path = base_dir + '/' + user_id
c1 = "zip -r {}/o365mail/O365mail{}.zip {} -r".format(base_dir, user_id, path)
os.system(c1)
c2 = "rm -rf {}".format(path)
os.system(c2)
return True
def drive_zip_and_remove(self, user_id):
path = base_dir + '/' + user_id
c1 = "zip -r {}/o365drive/O365drive{}.zip {} -r".format(base_dir, user_id, path)
os.system(c1)
c2 = "rm -rf {}".format(path)
os.system(c2)
return True