#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Load bilibili history danmaku, and return json.

import requests
import logging as log
import json
import time
from lxml import etree
from .headers import headers


def get_history_danmaku(oid, date):
    dm_list = []

    # get data
    url = f'https://api.bilibili.com/x/v2/dm/history'
    params = {'type': 1, 'oid': oid, 'date': date}
    r = requests.get(url, params=params, headers=headers)
    content = r.content
    log.debug(content.decode('utf-8'))

    # read xml
    xml = etree.HTML(content)
    for d in xml.xpath('//d'):
        attrs = d.xpath('./@p')[0]
        attrs = attrs.split(',')
        text = d.xpath('./text()')[0]
        log.debug(f'{attrs}, {text}')

        # format data
        d = {
            'cid': int(oid),
            'time': int(float(attrs[0])),  # 发送时间点（视频播放点）
            'position': int(attrs[1]),  # 弹幕位置
            'fontsize': int(attrs[2]),  # 字体大小
            'color': ('000000' + str(hex(int(attrs[3])))[2:])[-6:],  # 弹幕颜色
            'ctime': int(attrs[4]),  # 弹幕创建时间
            'unknown': attrs[5],
            'author': attrs[6],  # 发送者编号（不同于 uid ）
            'dmid': int(attrs[7]),  # 弹幕 id
            'content': text,  # 弹幕内容
            'date': date,
            'updateTime': int(time.time())
        }
        dm_list.append(d)

    return dm_list


if __name__ == '__main__':

    log.basicConfig(level=log.DEBUG)

    oid = 136870419
    date = '2019-12-20'
    d = get_history_danmaku(oid, date)
    print(json.dumps(d, ensure_ascii=False, indent=4))

get_all_history_danmaku.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import requests
import logging as log
import os
import re
import json
from datetime import datetime, timedelta
from .get_danmaku import get_history_danmaku
from .headers import headers


def get_all_history_danmaku(aid):
    url = f'https://www.bilibili.com/video/av{aid}'
    body = requests.get(url).text
    log.debug(body)

    # get cid / oid
    pages = re.findall(r'(?<="pages":)\[.*?\]', body)[0]
    cids = re.findall(r'(?<="cid":)\d*', pages)
    log.info(f'{cids=}')

    # get post date
    publish = re.findall(r'(?<=Published" content=")\d{4}-\d{2}-\d{2}', body)[0]
    start_date = datetime.strptime(publish, '%Y-%m-%d')
    log.info(f'{publish=}')

    result = {}
    while True:
        date = start_date.strftime('%Y-%m-%d')
        log.info(f'get danmaku of {date}')
        for cid in cids:
            dms = get_history_danmaku(cid, date)  # get danmaku
            for dm in dms:  # format data
                dmid = dm['dmid']
                result.setdefault(dmid, dm)  # 防止重复添加

        # go next day or exit
        start_date += timedelta(1)
        if start_date > datetime.now():
            break

    here = os.path.abspath(os.path.dirname(__file__))
    output = os.path.join(here, f'av{aid}_dm.json')
    with open(output, 'w', encoding='utf-8') as f:
        f.write(json.dumps(result, ensure_ascii=False, indent=2))


if __name__ == '__main__':

    log.basicConfig(level=log.INFO)

    aid = 79974337
    get_all_history_danmaku(aid)