-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathbaidupan.py
executable file
·92 lines (79 loc) · 3.24 KB
/
baidupan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python3
import argparse
from urllib import request
import urllib.error
from http import cookiejar
import json
import re
import distutils.spawn
import subprocess
UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.77.4 ' \
'(KHTML, like Gecko) Version/7.0.5 Safari/537.77.4'
# call wget to download, inspired by xunlei-lixian@github
def wget_download(download_url, file_name='', resume=False):
wget_cmd = ['wget', download_url]
if file_name != '':
wget_cmd.append('--user-agent="' + UA + '"')
wget_cmd.append('-O')
wget_cmd.append(file_name)
if resume:
wget_cmd.append('-c')
assert distutils.spawn.find_executable(wget_cmd[0]), "Cannot find " \
f"{wget_cmd[0]}"
exit_code = subprocess.call(wget_cmd)
if exit_code != 0:
raise Exception('Cannot call wget to download.')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='BaiduPan Downloader')
parser.add_argument('url', help='BaiduPan URL')
parser.add_argument('--resume', action='store_true',
help='Resume getting a partially-downloaded file.')
args = parser.parse_args()
# check the url contain pan.baidu.com
if args.url.find('pan.baidu.com') == -1:
raise Exception('URL must contain pan.baidu.com.')
# use urllib2 to get html data
cookieJar = cookiejar.CookieJar()
urlOpener = request.build_opener(request.HTTPCookieProcessor(cookieJar))
urlOpener.addheaders = [('User-agent', UA)]
print('Getting html data...')
try:
html = urlOpener.open(args.url).read().decode()
except urllib.error.URLError or urllib.error.HTTPError:
raise Exception('Please check the URL.')
# check the html data contain <head> keyword
assert '<head>' in html, 'Cannot get correct html page.'
# use regexp to search the data
regexhtml = r'"server_filename":"(.+?)"'
pattern = re.compile(regexhtml, re.UNICODE)
m = pattern.search(html)
fn = m.group(1)
m = re.search(r'"fs_id":(\d+),', html)
fs_id = m.group(1)
m = re.search(r'yunData.SHARE_UK = "(\d+)";', html, re.UNICODE)
share_uk = m.group(1)
m = re.search(r'yunData.SHARE_ID = "(\d+)";', html, re.UNICODE)
share_id = m.group(1)
m = re.search(r'yunData.TIMESTAMP = "(\d+)";', html, re.UNICODE)
share_timestamp = m.group(1)
m = re.search(r'yunData.SIGN = "([0-9a-f]+)";', html, re.UNICODE)
share_sign = m.group(1)
# get real download link, inspired by pan-baidu-download@github
purl = 'http://pan.baidu.com/share/download?channel=chunlei&clienttype=0' \
'&web=1' \
'&uk=' + share_uk + \
'&shareid=' + share_id + \
'×tamp=' + share_timestamp + \
'&sign=' + share_sign
pdata = 'fid_list=["' + fs_id + '"]'
pdata = pdata.encode('utf-8')
jdata = json.load(urlOpener.open(purl, pdata))
if not jdata.get('errno') and jdata.get('dlink') is not None:
dlink = jdata.get('dlink').encode('utf-8')
else:
raise Exception('Cannot get download link. Please try again later.')
# download file
if args.resume:
wget_download(dlink, fn, True)
else:
wget_download(dlink, fn)