'''
python 3.6, 3.7
pip install httpx loguru PyJWT
# 文泉学堂
# 3208943 ('Python+TensorFlow机器学习实战', '248') 第1页
python fetch_png.py
# 第10页
python fetch_png.py 3208943 10
'''
# from typing import Union, Tuple
from pathlib import Path
from time import time
import json
import httpx
import jwt
from loguru import logger
JWT_SECRET = 'g0NnWdSE8qEjdMD8a1aq12qEYphwErKctvfd3IktWHWiOBpVsgkecur38aBRPn2w'
SESS = httpx.Client()
URL = 'https://lib-nuanxin.wqxuetang.com'
SESS.get(URL)
# def gen_jwt_key(self):
def gen_jwt_key(bookid):
''' jwt key for bookid '''
# url = "https://lib-nuanxin.wqxuetang.com/v1/read/k?bid=%s" % bookid
url = f'{URL}/v1/read/k?bid={bookid}'
# r = self.session.get(url, timeout=5)
# r = SESS.get(url, timeout=5)
# j = json.loads(r.text)
try:
resp = SESS.get(url)
resp.raise_for_status()
except Exception as exc:
logger.warning(exc)
return str(exc)
try:
jdata = resp.json()
except Exception as exc:
logger.warning(exc)
jdata = {}
res = jdata.get('data')
if res is None:
raise Exception('returned None, something is not right...')
return res
# def gen_jwt_token(self, page):
def gen_jwt_token(bookid, page=1):
''' gen jwt token '''
cur_time = time()
jwtkey = gen_jwt_key(bookid)
jwttoken = jwt.encode({
"p": page,
"t": int(cur_time)*1000,
"b": str(bookid),
"w": 1000,
# "k": json.dumps(self.jwtkey),
"k": json.dumps(jwtkey),
"iat": int(cur_time)
}, JWT_SECRET, algorithm='HS256').decode('ascii')
return jwttoken
# def bookinfo(self):
def bookinfo(bookid):
''' bookinfo '''
# url = f"https://lib-nuanxin.wqxuetang.com/v1/read/initread?bid={self.bookid}" # noqa
url = f'{URL}/v1/read/initread?bid={bookid}'
# r = self.session.get(url)
req = httpx.models.Request('GET', URL)
try:
resp = SESS.get(url)
resp.raise_for_status()
except Exception as exc:
logger.warning(exc)
resp = httpx.Response(status_code=499, request=req, content=str(exc).encode()) # noqa
try:
jdata = resp.json()
except Exception as exc:
logger.warning(exc)
jdata = {}
# info = json.loads(r.text)
# data = info['data']
# return data['name'], data['canreadpages']
data = jdata.get('data')
if data is None:
raise Exception('returned None, something is not right...')
bookinfo.jdata = jdata
return data.get('name'), data.get('canreadpages')
# async def download_img(self, page, task_id):
def fetch_png(bookid, page=1):
''' download booid page img '''
token = gen_jwt_token(bookid, page)
url = f'{URL}/page/img/{bookid}/{page}?k={token}'
headers = {
'referer': f'{URL}/read/pdf/{bookid}',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36' # noqa
}
req = httpx.models.Request('GET', URL)
try:
resp = SESS.get(url, headers=headers)
resp.raise_for_status()
except Exception as exc:
logger.warning(exc)
resp = httpx.Response(status_code=499, request=req, content=str(exc).encode()) # noqa
fetch_png.resp = resp
try:
res = resp.content
except Exception as exc:
logger.warning(exc)
return b''
return res
def main():
''' main '''
import os
import sys
bookid = 3208943
if not sys.argv[1:]:
logger.info(' Provide at least a bookid.')
logger.info(' Using %s to test ' % bookid)
else:
try:
bookid = sys.argv[1]
except Exception as exc:
logger.warning(exc)
page = 1
if not sys.argv[2:]:
logger.info(' Provide a page number.')
logger.info(' Using %s to test ' % page)
else:
try:
bookid = sys.argv[2]
except Exception as exc:
logger.warning(exc)
logger.info(f' Fetchiing {bookid} {bookinfo(bookid)} page: {page}')
res = fetch_png(bookid, page)
filename = f'{bookid}-{page:03d}.png'
count = 0
while Path(filename).exists():
count += 1
filename = f'{bookid}-{page:03d}-{count}.png'
if count > 4:
break
else:
logger.warning(f' Possibly overwriting {filename}')
Path(filename).write_bytes(res)
logger.info(f'{filename} saved.')
if sys.platform in ['win32']:
os.startfile(f'{filename}') # type: ignore
if __name__ == '__main__':
main()