This repository has been archived on 2021-11-25. You can view files and clone it, but cannot push or open issues or pull requests.
PDFSync/sync.py

121 lines
3.4 KiB
Python
Raw Permalink Normal View History

2018-05-11 14:36:36 +08:00
# PDF Sync Manager
# Created by Kiritow.
import platform
import os
from hashlib import md5
from ftplib import FTP
def ScanPDF(root_dir):
lst=[]
for parent,dirs,files in os.walk(root_dir):
for file in files:
if(file.endswith(".pdf")):
lst.append([file,os.path.join(parent,file)])
return lst
def GetMD5(filename):
f=open(filename,'rb')
h=md5()
while True:
b=f.read(4096)
if not b:
break
h.update(b)
f.close()
return h.hexdigest()
def CheckPDF(lst):
clst=[]
flst=[]
for pr in lst:
m=GetMD5(pr[1])
if(m in clst):
2018-05-11 18:12:37 +08:00
print('md5 same: ' + pr[0] + '. Skipped.')
2018-05-11 14:36:36 +08:00
else:
clst.append(m)
flst.append([pr[0],pr[1],m])
return flst
def SyncPDF(uinfo,lst):
2018-05-11 18:12:37 +08:00
mbcalc=lambda x:round(x/1024/1024,2)
2018-05-11 14:36:36 +08:00
ftp=FTP(uinfo[0])
ftp.encoding='UTF-8'
ftp.login(uinfo[1],uinfo[2])
2018-05-11 18:12:37 +08:00
2018-05-11 14:36:36 +08:00
ftp.cwd('/md5')
2018-05-11 18:12:37 +08:00
remote_track_list=ftp.nlst()
to_upload_list=[]
to_upload_byte=0
2018-05-11 14:36:36 +08:00
tmpid=0
for name,addr,check in lst:
tmpid=tmpid+1
2018-05-11 18:12:37 +08:00
if(check not in remote_track_list):
2018-05-11 14:36:36 +08:00
print('[' + str(tmpid) + '][Untracked] ' + name)
2018-05-11 18:12:37 +08:00
to_upload_list.append([name,addr,check])
to_upload_byte+=os.path.getsize(addr)
2018-05-11 14:36:36 +08:00
else:
print('[' + str(tmpid) + '][Synced] ' + name)
2018-05-11 18:12:37 +08:00
to_upload_cnt=len(to_upload_list)
if(to_upload_cnt<1):
2018-05-11 14:36:36 +08:00
print('Nothing to upload.')
return
2018-05-11 18:12:37 +08:00
print('Totoally ' + str(to_upload_cnt) + ' files need to upload. '
+ 'Need to upload: ' + str(mbcalc(to_upload_byte)) + 'MB')
2018-05-11 14:36:36 +08:00
choice=input('Are you sure to upload? (Y/N): ')
if(choice!='Y'):
print('Aborted.')
return
ftp.cwd('/')
2018-05-11 18:12:37 +08:00
remote_filename_list=ftp.nlst()
2018-05-11 14:36:36 +08:00
2018-05-11 18:12:37 +08:00
uploaded_byte=0
for i in range(to_upload_cnt):
print('[' + str(i+1) + '/' + str(to_upload_cnt) + '][Uploading] '
+ to_upload_list[i][0] + '...')
# Adjust filename
remote_filename=to_upload_list[i][0]
while(remote_filename in remote_filename_list):
2018-05-11 14:36:36 +08:00
remote_filename=remote_filename.replace(".pdf","_.pdf",1)
2018-05-11 18:12:37 +08:00
# Upload
with open(to_upload_list[i][1],'rb') as fp:
file_byte=os.path.getsize(to_upload_list[i][1])
print('file size: ' + str(mbcalc(file_byte)) + 'MB')
2018-05-11 14:36:36 +08:00
ftp.storbinary('STOR '+remote_filename,fp)
2018-05-11 18:12:37 +08:00
uploaded_byte+=file_byte
print(str(mbcalc(uploaded_byte)) + 'MB uploaded. ('
+ str(round(uploaded_byte/to_upload_byte*100,2)) + '%)')
2018-05-11 14:36:36 +08:00
# MD5 file content must be utf-8
2018-05-11 18:12:37 +08:00
with open(to_upload_list[i][2],'w',encoding='utf-8') as cf:
2018-05-11 14:36:36 +08:00
cf.write(remote_filename)
2018-05-11 18:12:37 +08:00
with open(to_upload_list[i][2],'rb') as cf:
ftp.storbinary('STOR /md5/'+to_upload_list[i][2],cf)
print('Check file ' + to_upload_list[i][2] + ' updated.')
os.remove(to_upload_list[i][2])
2018-05-11 14:36:36 +08:00
def FetchInfo():
svaddr=input('Server Addr:')
uname=input('FTP Username:')
upass=input('FTP Password:')
return (svaddr,uname,upass)
# Main Program
print("""PDF Sync Manager
Author: Kiritow""")
2018-05-11 18:12:37 +08:00
search_dir=input('Enter root directory to search :')
2018-05-11 14:36:36 +08:00
print ('Scanning...')
2018-05-11 18:12:37 +08:00
tlst=ScanPDF(search_dir)
clst=CheckPDF(tlst)
print ('[Scan Result] ' + str(len(tlst)) + ' PDF found. ' + str(len(clst)) + ' unique PDF.')
2018-05-11 14:36:36 +08:00
print ('Syncing...')
uinfo=FetchInfo()
2018-05-11 18:12:37 +08:00
SyncPDF(uinfo,clst)