清华大学免费开放“文泉学堂”,高质量书籍免费看
-
文泉学堂知识库收录了清华大学出版社,近10年的正版电子课程和书籍
https://lib-nuanxin.wqxuetang.com
目前没有很好的下载办法,每页都加密了。
可以采用截图的笨办法。
期待大神出脚本 -
''' python 3.6, 3.7 pip install httpx loguru PyJWT # 文泉学堂 # 3208943 ('Python+TensorFlow机器学习实战', '248') 第1页 python fetch_png.py # 第10页 python fetch_png.py 3208943 10 ''' # from typing import Union, Tuple from pathlib import Path from time import time import json import httpx import jwt from loguru import logger JWT_SECRET = 'g0NnWdSE8qEjdMD8a1aq12qEYphwErKctvfd3IktWHWiOBpVsgkecur38aBRPn2w' SESS = httpx.Client() URL = 'https://lib-nuanxin.wqxuetang.com' SESS.get(URL) # def gen_jwt_key(self): def gen_jwt_key(bookid): ''' jwt key for bookid ''' # url = "https://lib-nuanxin.wqxuetang.com/v1/read/k?bid=%s" % bookid url = f'{URL}/v1/read/k?bid={bookid}' # r = self.session.get(url, timeout=5) # r = SESS.get(url, timeout=5) # j = json.loads(r.text) try: resp = SESS.get(url) resp.raise_for_status() except Exception as exc: logger.warning(exc) return str(exc) try: jdata = resp.json() except Exception as exc: logger.warning(exc) jdata = {} res = jdata.get('data') if res is None: raise Exception('returned None, something is not right...') return res # def gen_jwt_token(self, page): def gen_jwt_token(bookid, page=1): ''' gen jwt token ''' cur_time = time() jwtkey = gen_jwt_key(bookid) jwttoken = jwt.encode({ "p": page, "t": int(cur_time)*1000, "b": str(bookid), "w": 1000, # "k": json.dumps(self.jwtkey), "k": json.dumps(jwtkey), "iat": int(cur_time) }, JWT_SECRET, algorithm='HS256').decode('ascii') return jwttoken # def bookinfo(self): def bookinfo(bookid): ''' bookinfo ''' # url = f"https://lib-nuanxin.wqxuetang.com/v1/read/initread?bid={self.bookid}" # noqa url = f'{URL}/v1/read/initread?bid={bookid}' # r = self.session.get(url) req = httpx.models.Request('GET', URL) try: resp = SESS.get(url) resp.raise_for_status() except Exception as exc: logger.warning(exc) resp = httpx.Response(status_code=499, request=req, content=str(exc).encode()) # noqa try: jdata = resp.json() except Exception as exc: logger.warning(exc) jdata = {} # info = json.loads(r.text) # data = info['data'] # return data['name'], data['canreadpages'] data = jdata.get('data') if data is None: raise Exception('returned None, something is not right...') bookinfo.jdata = jdata return data.get('name'), data.get('canreadpages') # async def download_img(self, page, task_id): def fetch_png(bookid, page=1): ''' download booid page img ''' token = gen_jwt_token(bookid, page) url = f'{URL}/page/img/{bookid}/{page}?k={token}' headers = { 'referer': f'{URL}/read/pdf/{bookid}', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36' # noqa } req = httpx.models.Request('GET', URL) try: resp = SESS.get(url, headers=headers) resp.raise_for_status() except Exception as exc: logger.warning(exc) resp = httpx.Response(status_code=499, request=req, content=str(exc).encode()) # noqa fetch_png.resp = resp try: res = resp.content except Exception as exc: logger.warning(exc) return b'' return res def main(): ''' main ''' import os import sys bookid = 3208943 if not sys.argv[1:]: logger.info(' Provide at least a bookid.') logger.info(' Using %s to test ' % bookid) else: try: bookid = sys.argv[1] except Exception as exc: logger.warning(exc) page = 1 if not sys.argv[2:]: logger.info(' Provide a page number.') logger.info(' Using %s to test ' % page) else: try: bookid = sys.argv[2] except Exception as exc: logger.warning(exc) logger.info(f' Fetchiing {bookid} {bookinfo(bookid)} page: {page}') res = fetch_png(bookid, page) filename = f'{bookid}-{page:03d}.png' count = 0 while Path(filename).exists(): count += 1 filename = f'{bookid}-{page:03d}-{count}.png' if count > 4: break else: logger.warning(f' Possibly overwriting {filename}') Path(filename).write_bytes(res) logger.info(f'{filename} saved.') if sys.platform in ['win32']: os.startfile(f'{filename}') # type: ignore if __name__ == '__main__': main()
1/5