清华大学免费开放“文泉学堂”,高质量书籍免费看

  • 🍌🍌

    文泉学堂知识库收录了清华大学出版社,近10年的正版电子课程和书籍

    https://lib-nuanxin.wqxuetang.com

    目前没有很好的下载办法,每页都加密了。
    可以采用截图的笨办法。
    期待大神出脚本

  • 🍌🍌

    '''
    python 3.6, 3.7
     
    pip install httpx loguru PyJWT
     
    # 文泉学堂
    # 3208943 ('Python+TensorFlow机器学习实战', '248') 第1页
    python fetch_png.py
     
    # 第10页
    python fetch_png.py 3208943 10
     
    '''
     
    # from typing import Union, Tuple
    from pathlib import Path
    from time import time
    import json
    import httpx
    import jwt
    from loguru import logger
     
    JWT_SECRET = 'g0NnWdSE8qEjdMD8a1aq12qEYphwErKctvfd3IktWHWiOBpVsgkecur38aBRPn2w'
    SESS = httpx.Client()
    URL = 'https://lib-nuanxin.wqxuetang.com'
    SESS.get(URL)
     
     
    # def gen_jwt_key(self):
    def gen_jwt_key(bookid):
        ''' jwt key for bookid '''
        # url = "https://lib-nuanxin.wqxuetang.com/v1/read/k?bid=%s" % bookid
        url = f'{URL}/v1/read/k?bid={bookid}'
        # r = self.session.get(url, timeout=5)
     
        # r = SESS.get(url, timeout=5)
        # j = json.loads(r.text)
     
        try:
            resp = SESS.get(url)
            resp.raise_for_status()
        except Exception as exc:
            logger.warning(exc)
            return str(exc)
     
        try:
            jdata = resp.json()
        except Exception as exc:
            logger.warning(exc)
            jdata = {}
     
        res = jdata.get('data')
        if res is None:
            raise Exception('returned None, something is not right...')
     
        return res
     
     
    # def gen_jwt_token(self, page):
    def gen_jwt_token(bookid, page=1):
        ''' gen jwt token '''
        cur_time = time()
        jwtkey = gen_jwt_key(bookid)
        jwttoken = jwt.encode({
            "p": page,
            "t": int(cur_time)*1000,
            "b": str(bookid),
            "w": 1000,
            # "k": json.dumps(self.jwtkey),
            "k": json.dumps(jwtkey),
            "iat": int(cur_time)
        }, JWT_SECRET, algorithm='HS256').decode('ascii')
        return jwttoken
     
     
    # def bookinfo(self):
    def bookinfo(bookid):
        ''' bookinfo '''
        # url = f"https://lib-nuanxin.wqxuetang.com/v1/read/initread?bid={self.bookid}"  # noqa
        url = f'{URL}/v1/read/initread?bid={bookid}'
        # r = self.session.get(url)
     
        req = httpx.models.Request('GET', URL)
        try:
            resp = SESS.get(url)
            resp.raise_for_status()
        except Exception as exc:
            logger.warning(exc)
            resp = httpx.Response(status_code=499, request=req, content=str(exc).encode())  # noqa
     
        try:
            jdata = resp.json()
        except Exception as exc:
            logger.warning(exc)
            jdata = {}
     
        # info = json.loads(r.text)
     
        # data = info['data']
        # return data['name'], data['canreadpages']
     
        data = jdata.get('data')
     
        if data is None:
            raise Exception('returned None, something is not right...')
     
        bookinfo.jdata = jdata
     
        return data.get('name'), data.get('canreadpages')
     
     
    # async def download_img(self, page, task_id):
    def fetch_png(bookid, page=1):
        ''' download booid page img '''
        token = gen_jwt_token(bookid, page)
        url = f'{URL}/page/img/{bookid}/{page}?k={token}'
        headers = {
            'referer': f'{URL}/read/pdf/{bookid}',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'  # noqa
        }
     
        req = httpx.models.Request('GET', URL)
     
        try:
            resp = SESS.get(url, headers=headers)
            resp.raise_for_status()
        except Exception as exc:
            logger.warning(exc)
            resp = httpx.Response(status_code=499, request=req, content=str(exc).encode())  # noqa
     
        fetch_png.resp = resp
     
        try:
            res = resp.content
        except Exception as exc:
            logger.warning(exc)
            return b''
     
        return res
     
     
    def main():
        ''' main '''
        import os
        import sys
     
        bookid = 3208943
        if not sys.argv[1:]:
            logger.info(' Provide at least a bookid.')
            logger.info(' Using %s to test ' % bookid)
        else:
            try:
                bookid = sys.argv[1]
            except Exception as exc:
                logger.warning(exc)
     
        page = 1
        if not sys.argv[2:]:
            logger.info(' Provide a page number.')
            logger.info(' Using %s to test ' % page)
        else:
            try:
                bookid = sys.argv[2]
            except Exception as exc:
                logger.warning(exc)
        logger.info(f' Fetchiing {bookid} {bookinfo(bookid)} page: {page}')
     
        res = fetch_png(bookid, page)
     
        filename = f'{bookid}-{page:03d}.png'
     
        count = 0
        while Path(filename).exists():
            count += 1
            filename = f'{bookid}-{page:03d}-{count}.png'
            if count > 4:
                break
        else:
            logger.warning(f' Possibly overwriting {filename}')
     
        Path(filename).write_bytes(res)
        logger.info(f'{filename} saved.')
     
        if sys.platform in ['win32']:
            os.startfile(f'{filename}')  # type: ignore
     
     
    if __name__ == '__main__':
        main()
    

  • @2nature 厉害了!

  • 🍌🍌

    @2nature 厉害了,高手

  • 🍌🍌

    @2nature 厉害👍

Copyright © 2019 - 2020 JIKE社区 版权所有