标签 python 下的文章

要编辑 MP3 文件的标签(如标题、艺术家、专辑等),可以使用 Python 中的 mutagen 库。mutagen 是一个处理音频元数据的库,支持 MP3、FLAC、AAC 等多种格式。

安装 mutagen

如果你还没有安装这个库,可以通过以下命令来安装:

pip install mutagen

编辑 MP3 标签的示例代码

下面是一个编辑 MP3 标签的 Python 代码示例:

from mutagen.easyid3 import EasyID3
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TDRC

def edit_mp3_tags(file_path, title=None, artist=None, album=None, year=None):
    # 加载MP3文件的ID3标签
    try:
        audio = EasyID3(file_path)
    except mutagen.id3.ID3NoHeaderError:
        # 如果文件没有ID3标签,创建新的标签
        audio = mutagen.File(file_path, easy=True)
        audio.add_tags()
    
    # 编辑标签信息
    if title:
        audio['title'] = title
    if artist:
        audio['artist'] = artist
    if album:
        audio['album'] = album
    if year:
        audio['date'] = year
    
    # 保存标签
    audio.save()

# 调用示例
file_path = 'example.mp3'
edit_mp3_tags(file_path, title="新歌名", artist="新艺术家", album="新专辑", year="2024")

解释:

  • EasyID3 是一个简化的接口,可以直接通过键(如 titleartist 等)访问常见的标签。
  • 如果 MP3 文件没有 ID3 标签,代码会为它创建一个新的标签。
  • 你可以通过 edit_mp3_tags 函数修改 MP3 文件的各种元数据。

你可以根据自己的需求修改这些代码来批量处理文件或设置其他类型的标签。

#!/usr/bin/python
# -*- coding: utf-8 -*-

import requests,time,re
from lxml import etree
from decimal import Decimal
from unicodedata import normalize
from multiprocessing import Pool,cpu_count

headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 5.1; rv:52.0) Gecko/20100101 Firefox/52.0',
        'Referer':'https://www.vodtw.com/',
    }

def getHtml(url,headers=headers):
    r = requests.get(url,headers=headers,proxies=None)
    return r.content

def useXpath(html):    
    html = etree.HTML(html)
    urls = html.xpath("//dl/dd/a/@href")
    return urls

def getContent(url):
    url = 'https://www.vodtw.com/book/516/' + url
    html = getHtml(url)

    # 每个章节只显示一部分,另外一部分通过xhr异步请求。
    reg_p_bufUrl = "bufurl='(.*?)'"
    matches = re.findall(reg_p_bufUrl, html.decode('utf-8'), re.MULTILINE)
    nextUrl = 'https://www.vodtw.com' + matches[0]

    html = etree.HTML(html)
    p = html.xpath("//*[@id='content']/text()")
    title = html.xpath("//h1/text()")
    print(f"正在抓取 {url} {title[1]}")
    content = ''
    for item in p:
        content += normalize("NFKD",item) + '\n'

    temp = getHtml(nextUrl).decode('utf-8').replace('<br><br>','\n')
    content = content[:-1] + temp
    # print(content)
    return {"title":title[1],"content":content,"url":url}

if __name__ == "__main__":
    start = time.time()
    url = 'https://www.vodtw.com/book/516/'
    html = getHtml(url)
    mulu = useXpath(html)
    filename = '寒门枭士.txt'
    cpus = cpu_count()
    pool = Pool(cpus)
    res_list = []
    i = 0
    allcontents = []
    for url in mulu[18:]:
        i =i+1
        res = pool.apply_async(func=getContent, args=(url,))
        res_list.append(res)

        if i %10 == 0:            
            # 每10章写入文件一次。
            for res in res_list:
                content = res.get()
                if content:
                    allcontents.append(content)
            
            for x in allcontents:
                with open(filename,'a',encoding='utf-8') as f:
                    f.write(x["title"] + '\n')
                    f.write(x["content"] + '\n')
            allcontents.clear()
            res_list.clear()
            time.sleep(3)
    # 进程池关闭
    pool.close()

    # 等待所有进程结束
    pool.join()

    for x in allcontents:
        with open(filename,'a',encoding='utf-8') as f:
            f.write(x["title"] + '\n')
            f.write(x["content"] + '\n')  

    print(f"抓取完毕,总共耗时:{Decimal((time.time()-start)//60).quantize(Decimal('0'))}分{Decimal((time.time()-start)%60).quantize(Decimal('0.00'))}秒")

简介

19年雷佳音、易烊千玺主演的电视剧
《长安十二时辰》是由曹盾执导,雷佳音、易烊千玺领衔主演的古装悬疑剧。
该剧改编自马伯庸的同名小说,讲述了唐朝上元节前夕,长安城陷入危局,长安死囚张小敬临危受命,与李必携手在十二时辰内拯救长安的故事。
该剧于2019年6月27日在优酷视频播出

看原版小说

  • 链接 https://www.luoxia.com/shiershichen/
  • 代码

    # coding:utf-8
    
    # desc: 落霞小说网 爬小说
    # https://www.luoxia.com/shiershichen/
    
    import requests
    from lxml import etree
    
    s = requests.Session()
    s.keep_alive = False
    
    def getHtml(url):
      r = s.get(url,headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36'})
      #print(r.text)
      html = etree.HTML(r.text)
      urls_text = html.xpath("//div[@id='content-list']/div/ul/li/a/text()")
      urls = html.xpath("//div[@id='content-list']/div/ul/li/a/@href")
      return urls,urls_text
    
    def getContent(url,filename):
      r = s.get(url,headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36','referer': 'https://www.luoxia.com/shiershichen'})
      # print(r.text)
      html = etree.HTML(r.text)
      contents = html.xpath("//div[@id='nr1']/p/text()")
      with open(filename,'a', encoding='utf-8') as f:
          for x in contents:
              f.write(x + '\n')
    
    if __name__ == "__main__":
      url = "https://www.luoxia.com/shiershichen/"
      muluhtml = getHtml(url)
      filename ='长安十二时辰.txt'
      for x in range(len(muluhtml[0])):
          with open(filename,'a', encoding='utf-8') as f:
              f.write('\n' + muluhtml[1][x] + '\n')
          getContent(muluhtml[0][x],filename)
          print("正在抓取 长安十二时辰 %s , 章节名是 %s " % (muluhtml[0][x],muluhtml[1][x]))
  • 效果