拾月 发布的文章

安装
pip3 install pipenv

新建项目虚拟环境的步骤
1.创建项目目录,并进入(cmd)
2.项目初始化

pipenv --python 3

3.安装项目所需的包,会生成两个文件pipfile、pipfile.lock

pipenv install flask flask_mysqldb
pipenv install requests==2.13.0 # 指定包的版本

4.进入虚拟环境的方式

pipenv shell

5.在虚拟环境运行命令

pipenv run xxx
eg:pipenv run pip list

6.查看虚拟环境相关信息,项目所绑定的虚拟目录

pipenv --venv

7.查看项目各种包的依赖关系

pipenv graph

8.安装开发环境测试包,(不部署到生产环境),注意,在Pipfilede [dev-packages]

pipenv install --dev requests

9.团队项目环境共享方法,直接分享pipfile这个文件即可。然后:

pipenv install       #安装初始化项目依赖包,不包括开发环境的依赖包
pipenv install --dev #包括开发环境依赖包

10.删除虚拟环境

pipenv --rm

11.代码运行

pipenv run python main.py

12.运行脚本,脚本定义在pipfile中的[scripts]段,比如

[scripts]
start = "python main.py"
test = "pytest"
list = "pip list"

运行:pipenv run start  #这里的start,就是上面定义的。

13.从 requirements.txt 导入环境

pipenv install -r path/to/requirements.txt

14.生成 requirements.txt

pipenv lock -r     # 生成requirements.txt文件
pipenv lock -r -d  # 生成dev-packages的requirements.txt文件

15.常见命令介绍

pipenv install                  # 安装包
pipenv shell                  # 激活当前项目的虚拟环境
pipenv install pytest --dev   # 安装开发依赖包
pipenv graph                  # 图形显示包依赖关系
pipenv lock                      # 生成lockfile
pipenv uninstall --all          # 删除所有的安装包

16.bash的shell补全,添加下面语句到.bashrc或.bash_profile

eval "$(pipenv --completion)"

https://crazygit.wiseturtles.com/2018/01/08/pipenv-tour/

#!/usr/bin/python
# -*- coding: utf-8 -*-

import requests,time,re
from lxml import etree
from decimal import Decimal
from unicodedata import normalize
from multiprocessing import Pool,cpu_count

headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 5.1; rv:52.0) Gecko/20100101 Firefox/52.0',
        'Referer':'https://www.vodtw.com/',
    }

def getHtml(url,headers=headers):
    r = requests.get(url,headers=headers,proxies=None)
    return r.content

def useXpath(html):    
    html = etree.HTML(html)
    urls = html.xpath("//dl/dd/a/@href")
    return urls

def getContent(url):
    url = 'https://www.vodtw.com/book/516/' + url
    html = getHtml(url)

    # 每个章节只显示一部分,另外一部分通过xhr异步请求。
    reg_p_bufUrl = "bufurl='(.*?)'"
    matches = re.findall(reg_p_bufUrl, html.decode('utf-8'), re.MULTILINE)
    nextUrl = 'https://www.vodtw.com' + matches[0]

    html = etree.HTML(html)
    p = html.xpath("//*[@id='content']/text()")
    title = html.xpath("//h1/text()")
    print(f"正在抓取 {url} {title[1]}")
    content = ''
    for item in p:
        content += normalize("NFKD",item) + '\n'

    temp = getHtml(nextUrl).decode('utf-8').replace('<br><br>','\n')
    content = content[:-1] + temp
    # print(content)
    return {"title":title[1],"content":content,"url":url}

if __name__ == "__main__":
    start = time.time()
    url = 'https://www.vodtw.com/book/516/'
    html = getHtml(url)
    mulu = useXpath(html)
    filename = '寒门枭士.txt'
    cpus = cpu_count()
    pool = Pool(cpus)
    res_list = []
    i = 0
    allcontents = []
    for url in mulu[18:]:
        i =i+1
        res = pool.apply_async(func=getContent, args=(url,))
        res_list.append(res)

        if i %10 == 0:            
            # 每10章写入文件一次。
            for res in res_list:
                content = res.get()
                if content:
                    allcontents.append(content)
            
            for x in allcontents:
                with open(filename,'a',encoding='utf-8') as f:
                    f.write(x["title"] + '\n')
                    f.write(x["content"] + '\n')
            allcontents.clear()
            res_list.clear()
            time.sleep(3)
    # 进程池关闭
    pool.close()

    # 等待所有进程结束
    pool.join()

    for x in allcontents:
        with open(filename,'a',encoding='utf-8') as f:
            f.write(x["title"] + '\n')
            f.write(x["content"] + '\n')  

    print(f"抓取完毕,总共耗时:{Decimal((time.time()-start)//60).quantize(Decimal('0'))}分{Decimal((time.time()-start)%60).quantize(Decimal('0.00'))}秒")

简介

19年雷佳音、易烊千玺主演的电视剧
《长安十二时辰》是由曹盾执导,雷佳音、易烊千玺领衔主演的古装悬疑剧。
该剧改编自马伯庸的同名小说,讲述了唐朝上元节前夕,长安城陷入危局,长安死囚张小敬临危受命,与李必携手在十二时辰内拯救长安的故事。
该剧于2019年6月27日在优酷视频播出

看原版小说

  • 链接 https://www.luoxia.com/shiershichen/
  • 代码

    # coding:utf-8
    
    # desc: 落霞小说网 爬小说
    # https://www.luoxia.com/shiershichen/
    
    import requests
    from lxml import etree
    
    s = requests.Session()
    s.keep_alive = False
    
    def getHtml(url):
      r = s.get(url,headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36'})
      #print(r.text)
      html = etree.HTML(r.text)
      urls_text = html.xpath("//div[@id='content-list']/div/ul/li/a/text()")
      urls = html.xpath("//div[@id='content-list']/div/ul/li/a/@href")
      return urls,urls_text
    
    def getContent(url,filename):
      r = s.get(url,headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36','referer': 'https://www.luoxia.com/shiershichen'})
      # print(r.text)
      html = etree.HTML(r.text)
      contents = html.xpath("//div[@id='nr1']/p/text()")
      with open(filename,'a', encoding='utf-8') as f:
          for x in contents:
              f.write(x + '\n')
    
    if __name__ == "__main__":
      url = "https://www.luoxia.com/shiershichen/"
      muluhtml = getHtml(url)
      filename ='长安十二时辰.txt'
      for x in range(len(muluhtml[0])):
          with open(filename,'a', encoding='utf-8') as f:
              f.write('\n' + muluhtml[1][x] + '\n')
          getContent(muluhtml[0][x],filename)
          print("正在抓取 长安十二时辰 %s , 章节名是 %s " % (muluhtml[0][x],muluhtml[1][x]))
  • 效果

需求

  • 把同个目录下的word文档都转换成pdf文档
  • 保存到同个目录下的同名文件

代码如下

  • Win10 + Office 2019 测试通过

    Sub Sava2PDF()
    '定义对话框变量
    Dim fd As FileDialog
    Set fd = Application.FileDialog(msoFileDialogFilePicker)
    
    Dim BaseFileName As String
    Dim FileNameArray() As String
    
    With fd
          If .Show = -1 Then
              '定义单个文件变量
              Dim vrtSelectedItem As Variant
               
              '定义循环变量
              Dim i As Integer
              i = 1
               
              '开始文件检索
              For Each vrtSelectedItem In .SelectedItems
    
                  '打开要转换为PDF的word文件
                  Dim tempDC As Document
                  Set tempDC = Documents.Open(vrtSelectedItem)
                  FileNameArray = Split(tempDC.Name, ".")
                  BaseFileName = FileNameArray(0)
                  'MsgBox (BaseFileName)
                  
                  tempDC.ExportAsFixedFormat OutputFileName:= _
                  tempDC.Path + "\" + BaseFileName + ".pdf", ExportFormat:=wdExportFormatPDF, _
                  OpenAfterExport:=False, OptimizeFor:=wdExportOptimizeForPrint, Range:= _
                  wdExportAllDocument, From:=1, To:=1, Item:=wdExportDocumentContent, _
                  IncludeDocProps:=True, KeepIRM:=True, CreateBookmarks:= _
                  wdExportCreateNoBookmarks, DocStructureTags:=True, BitmapMissingFonts:= _
                  True, UseISO19005_1:=False
                  tempDC.Close
          
              Next vrtSelectedItem
          End If
      End With
       
      Set fd = Nothing
    
    
    
    End Sub

成果

ffmpeg 从视频中提取音频文件保存为MP3

ffmpeg -i apple.mp4 -f mp3 -vn apple.mp3

ffmpeg 从视频中提取音频文件,修改播放速度

ffmpeg -i apple.mp4 -filter:a "atempo=1.3" -f mp3 -vn apple.mp3

注意:

  • 倍率调整范围为[0.5, 2.0]
#coding:utf-8
#description: MP42mp3

import os,sys
import argparse
import subprocess

def convert(path):
    bin = "ffmpeg.exe"
    ls = os.listdir(path)
    for x in ls:
        if x.endswith("mp4"):
            cmdargs = '%s -i "%s" -filter:a "atempo=1.3" -f mp3 -vn "%s"' % (bin ,x,x.replace(".mp4",".mp3"))
            print cmdargs

if __name__ =="__main__":
    if ( os.path.isdir(sys.argv[1])):
        path = sys.argv[1]
        convert(path)

参考链接:https://blog.csdn.net/matrix_laboratory/article/details/53158307