python功能命令收集
├── 系统相关操作
│   ├── 时间
│   ├── 检查使用的系统
│   ├── 获取文件相对路径、绝对路径
│   └── 获取当前路径地址
├── 文件操作
│   ├── 创建文件夹
│   ├── 删除文件夹
│   ├── 打开文件，读取方式
│   ├── 写入文件
│   ├── 下载文件
│   └── 获取文件内容行数
├── 数据处理
│   └── 列表
│       ├── 列表增加 
│       ├── 将文件每行内容读取成列表
│       ├── 将列表内容，以行写入字符串
│       ├── findall查找内容写入列表
│       ├── 列表循环读取
│       ├── 列表去重  
│       └── 列表进度条  
├── 检测网站是否可用
├── 多线程
├── 进度条
├── Linux系统
│   └── .sh 操作
└── github Actions workflows
    ├── 触发条件
    ├── 基本系统配置
    ├── 安装依赖
    ├── 安装工具
    └── 提交

系统相关操作

时间

## 获取日期字符串
today = datetime.datetime.today()
path_year = sub_path+'/'+str(today.year)
path_mon  = path_year+'/'+str(today.month)
path_yaml = path_mon+'/'+str(today.month)+'-'+str(today.day)+'.yaml'

检查使用的系统

import platform
def checkenv():
    operating_system = str(platform.platform())
	
    if operating_system.startswith('macOS'):
        if 'arm64' in operating_system:
            clashname='./clash-darwinarm64'
        else:
            clashname='./clash-darwinamd64'
    elif operating_system.startswith('Linux'):
        clashname='./clash-linuxamd64'
    elif operating_system.startswith('Windows'):
        clashname='clash-windowsamd64.exe'
    else:
        print('Unsupported Platform')
        exit(1)
    print('Running on '+ operating_system)
    return clashname, operating_system

获取文件相对路径、绝对路径

import os

print(__file__)                     # 获取当前文件的相对路径
print(os.path.dirname(__file__))    # 获取当前文件所在目录的相对路径
print(os.path.abspath(__file__))    # 获取当前文件的绝对路径
print(os.path.dirname(os.path.abspath(__file__)))    # 获取当前文件的所在目录的绝对路径

获取当前路径地址

from pathlib import Path  # Path包含在此处

    home = str(Path.home())
    mmdbfl = home + '/.config/clash/Country.mmdb'

文件操作

创建文件夹

today = datetime.datetime.today()	#获取日期
path_year = str(today.year)	# 将日期的year转成str

   folder = os.path.exists(path_year)
   if not folder:                   #判断是否存在文件夹如果不存在则创建为文件夹
       os.makedirs(path_year) #创建文件夹

   folder = os.path.exists(path_mon)
   if not folder:                   #判断是否存在文件夹如果不存在则创建为文件夹
       os.makedirs(path_mon)  #创建文件夹

删除文件夹

1
2
3

import shutil

	shutil.rmtree('./temp') # 删除整个文件夹

打开文件，读取方式

f=open('f.txt','w')    # r只读，w可写，a追加
for i in range(0,10):
    f.write(str(i)+'\n')
f.close()

#下面是详细说明
#https://www.cnblogs.com/hls-code/p/15324191.html 	Python之文件的读取、创建、追加、删除、清空、替换
#https://www.zky.name/article/57.html				Python3 open() 函数详解 读取文件写入文件
r	以只读方式打开文件
rb	以二进制只读方式打开一个文件
r+	以读写方式打开一个文件
rb+	以二进制读写方式打开一个文件
w	以写入方式打开文件
wb	以二进制写入方式打开一个文件
w+	以读写方式打开一个文件
wb+	以二进制读写方式打开一个文件
a	以追加方式打开一个文件
ab	以二进制追加方式打开一个文件
a+	以读写追加方式打开一个文件
ab+	以二进制读写追加方式打开一个文件

写入文件

# 写入文件模块
def write_file(file,content):
    file = open(output_file, 'w', encoding= 'utf-8')
    file.write(content)
    file.close()

下载文件

from urllib import request          # Urllib是python内置的HTTP请求库,urllib.request获取URL的Python模块

geoip_url = 'https://raw.githubusercontent.com/Loyalsoldier/geoip/release/Country.mmdb'

    def geoip_update(url):                  # 更新IP位置数据库存放到  `./utils/Country.mmdb`
        print('Downloading Country.mmdb...')
        try:
            request.urlretrieve(url, './utils/Country.mmdb')	# url下载地址，'./utils/Country.mmdb'保存路径
            print('Downloading Country.mmdb Success!\n')
        except Exception:
            print('Downloading Country.mmdb Failed!\n')
            pass

if __name__ == '__main__':

    geoip_update(geoip_url)                           # 更新IP位置数据库'./utils/Country.mmdb'

获取文件内容行数

1
2
3

with open(file, 'r', encoding='utf-8') as f:
	total_amount = len(f.readlines())
	f.close()

数据处理

列表

列表增加

1	output_list.append(proxy)

将文件每行内容读取成列表

1	lines = re.split(r'\n+',urllist_content)

将列表内容，以行写入字符串

1	ownallProxy = '\n'.join(allProxy)

findall查找内容写入列表

1 2	import re url_list=re.findall("https?://[-A-Za-z0-9+&@#/%?=~_\|!:,.;]+[-A-Za-z0-9+&@#/%=~_\|]",data)#使用正则表达式查找订阅链接并创建列表

列表循环读取

for index in url:
    try:
        resp = s.get(index, timeout=2)                    # 超时时间为2s
        status = resp.status_code                       # 状态码赋值200？
    except Exception:
        status = 404
    if status == 200:
        print('url host use =='+index+'\n')
        return index
    else:
        print('\n'+index+'  :url host is bad,please use new url!...\n')

列表去重

def list_rm(url_list):  #列表去重
    begin = 0
    length = len(url_list)
    print(f'\n-----去重开始-----\n')
    while begin < length:
        proxy_compared = url_list[begin]
        begin_2 = begin + 1
        while begin_2 <= (length - 1):
            if proxy_compared == url_list[begin_2]:
                url_list.pop(begin_2)
                length -= 1
            begin_2 += 1
        begin += 1
    print(f'\n-----去重结束-----\n')
    return url_list

列表进度条

#需要支持库安装tqdm
#requirements.txt
#tqdm == 4.64.0

from tqdm import tqdm   #进度条库

def urlListToSub(urllistfile):
    file_urllist = open(urllistfile, 'r', encoding='utf-8')
    urllist_content = file_urllist.read()
    file_urllist.close()
    
    lines = re.split(r'\n+',urllist_content)
    allProxy = []
    
    #tqdm进度条方式显示fetch节点列表进度
    for index in tqdm(range(int(len(lines))), desc="Fetch:"):
        if 'http' in lines[index]:
            subContent =sub_convert.convert_remote(lines[index],'url','http://127.0.0.1:25500')
            allProxy.append(subContent)
    ownallProxy = '\n'.join(allProxy)

检测网站是否可用

#单地址检测：
import requests
from requests.adapters import HTTPAdapter

def url_updated(url):                                   # 判断远程远程链接是否已经更新
    s = requests.Session()                              # 用requests.session()创建session对象，相当于创建了一个空的会话框，准备保持cookies。
    s.mount('http://', HTTPAdapter(max_retries=2))      # 重试次数为2
    s.mount('https://', HTTPAdapter(max_retries=2))     # 重试次数为2
    try:
        resp = s.get(url, timeout=2)                    # 超时时间为2s
        status = resp.status_code                       # 状态码赋值200？
    except Exception:
        status = 404
    if status == 200:
        url_updated = True
    else:
        url_updated = False
    return url_updated

#URL 列表检测
    # 检测URLhost是否可用
    # 要转换的地址必须是网络订阅才可用使用网络URLhost，本地订阅文件获取不到数据
    def use_urlhost(url=url_host):
        s = requests.Session()                              # 用requests.session()创建session对象，相当于创建了一个空的会话框，准备保持cookies。
        s.mount('http://', HTTPAdapter(max_retries=2))      # 重试次数为2
        s.mount('https://', HTTPAdapter(max_retries=2))     # 重试次数为2
        for index in url:
            try:
                resp = s.get(index, timeout=2)                    # 超时时间为2s
                status = resp.status_code                       # 状态码赋值200？
            except Exception:
                status = 404
            if status == 200:
                print('url host use =='+index+'\n')
                return index
            else:
                print('\n'+index+'  :url host is bad,please use new url!...\n')
            
        print('oh,my god ,all url host are bad,sorry no work!...\n')
        return 'bad url Host'

多线程

import threading	#线程库https://www.runoob.com/python/python-multithreading.html

def sub_to_url(url,bar,allProxy):   #将url订阅内容append到allProxy列表，并完成进度bar
    if 'http' in url:
        subContent =sub_convert.convert_remote(url,'url','http://127.0.0.1:25500')        
        allProxy.append(subContent)
    bar.update(1)


def urlListToSub(urllistfile):  #将url订阅列表内容转换成url,base64,clash文件保存
    
    #打开url列表文件
    file_urllist = open(urllistfile, 'r', encoding='utf-8')
    urllist_content = file_urllist.read()
    file_urllist.close()
    
    #打开url列表文件内容，以行为单位存放到line列表
    lines = re.split(r'\n+',urllist_content)
    allProxy = []
    
    #计算打印url总数
    lenlines =len(lines)
    print('airport total == '+str(lenlines)+'\n')
    
    #Semaphore 是用于控制进入数量的锁，控制同时进行的线程，内部是基于Condition来进行实现的
    #https://www.cnblogs.com/callyblog/p/11147456.html
    #文件， 读、写， 写一般只是用于一个线程写，读可以允许有多个
    thread_max_num =threading.Semaphore(lenlines)
    
    #进度条添加
    bar = tqdm(total=lenlines, desc='订阅获取：')
    thread_list = []
    
    for line in lines:
        #为每个新URL创建线程
        t = threading.Thread(target=sub_to_url, args=(line,bar,allProxy))
        #加入线程池
        thread_list.append(t)
        #setDaemon()线程守护，配合下面的一组for...t.join(),实现所有线程执行结束后，才开始执行下面代码
        t.setDaemon(True)	#python多线程之t.setDaemon(True) 和 t.join()  https://www.cnblogs.com/my8100/p/7366567.html
		#启动
        t.start()
        
    #等待所有线程完成，配合上面的t.setDaemon(True)
    for t in thread_list:
        t.join()
    bar.close() #进度条结束

    # 将列表内容，以行写入字符串？
    ownallProxy = '\n'.join(allProxy)   

    # 写入url 订阅文件
    print('write miningUrl content!')
    file = open(outputUrlSub_path, 'w', encoding= 'utf-8')
    file.write(ownallProxy)
    file.close()

进度条

#需要支持库安装tqdm
#requirements.txt
#tqdm == 4.64.0

from tqdm import tqdm   #进度条库

    #计算打印url总数
    lenlines =len(lines)
    
    #进度条添加
	#开始
    bar = tqdm(total=lenlines, desc='订阅获取：')
	#进度加1
	bar.update(1)
	#进度完成
	bar.close()
	
e.g.:

def urlListToSub(urllistfile):
    file_urllist = open(urllistfile, 'r', encoding='utf-8')
    urllist_content = file_urllist.read()
    file_urllist.close()
    
    lines = re.split(r'\n+',urllist_content)	#读取为列表
    allProxy = []
    #tqdm进度条方式显示fetch节点列表进度
    for index in tqdm(range(int(len(lines))), desc="Fetch:"):	#简化tqdm进度条
        if 'http' in lines[index]:
            subContent =sub_convert.convert_remote(lines[index],'url','http://127.0.0.1:25500')
            allProxy.append(subContent)
    ownallProxy = '\n'.join(allProxy)	#将列表转为字符串

Linux系统

.sh 操作

currentdate=$(date +%Y%m%d)  
currentmonth=$(date +%Y%m)
currentmonths=$(date +%m)
currentyears=$(date +%Y)

subscribeclash="https://api.tsutsu.one/sub?target=clash&url=地址"
poolsubclash="https://api.tsutsu.one/sub?target=clash&url=地址"

cd `dirname $0`

echo delete old subscribes
if [ ! -f "./sub_v2rayshare.txt" ];then
 rm ./sub_v2rayshare.txt
fi
if [ ! -f "./sub_v2rayshare.yaml" ];then
 rm ./sub_v2rayshare.txt
fi
if [ ! -f "./allmysub.yaml" ];then
 rm ./allmysub.yaml
fi
if [ ! -f "./allmysub.txt" ];then
 rm ./allmysub.txt
fi
if [ ! -f "./poolsub.yaml" ];then
 rm ./poolsub.yaml
fi
if [ ! -f "./poolsub.txt" ];then
 rm ./poolsub.txt
fi
if [ ! -f "./openit.txt" ];then
 rm ./openit.txt
fi

echo get subscribe
wget https://v2rayshare.com/wp-content/uploads/$currentyears/$currentmonths/$currentdate.txt
wget https://v2rayshare.com/wp-content/uploads/$currentyears/$currentmonths/$currentdate.yaml

echo move subscribe
mv $currentdate.txt sub.txt
mv $currentdate.yaml sub.yaml

echo down subscribe
wget $subscribeclash -O ./allmysub.yaml
wget $poolsubclash -O ./poolsub.yaml

echo hope you have a good day~
echo bye~

github Actions workflows

触发条件

on:
  workflow_dispatch:
  
  # 运行条件"fetch proxy && check"运行完成后，开始运行
  workflow_run:
    workflows: ["fetch proxy && check"]
    types: [completed]
  schedule:
  #表达式生成  https://crontab.guru/ 
    - cron: '10 0,6,12,18 * * *'

基本系统配置

#基本系统配置
    runs-on: ubuntu-latest
    steps:
    - name: 迁出代码
      uses: actions/checkout@v2
    - name: 安装Python
      uses: actions/setup-python@v2
      with:
        python-version: '3.x'
    - name: 加载缓存
      uses: actions/cache@v2
      with:
        path: ~/.cache/pip
        key: ${{ runner.os }}-pip-${{ hashFiles('**/run_in_Actions/requirements.txt') }}
        restore-keys: |
          ${{ runner.os }}-pip-
    - name: 设置时区
      run: sudo timedatectl set-timezone 'Asia/Shanghai'

安装依赖

- name: 安装依赖
  run: |
    pip install -r ./utils/requirements.txt
    pip install -r ./utils/checkclash/requirements.txt

安装工具

- name: 安装订阅转换工具-subconverter
  run: |
    wget -O subconverter.tar.gz https://github.com/tindy2013/subconverter/releases/latest/download/subconverter_linux64.tar.gz
    tar -zxvf subconverter.tar.gz -C ./
    chmod +x ./subconverter/subconverter && nohup ./subconverter/subconverter >./subconverter.log 2>&1 &
		
- name: 安装测速工具-clash
  run: |
    wget -O clash-linuxamd64.gz https://github.com/Dreamacro/clash/releases/download/v1.11.4/clash-linux-amd64-v1.11.4.gz
    gunzip clash-linuxamd64.gz
    chmod +x ./clash-linuxamd64 && ./clash-linuxamd64 &

提交

- name: 提交更改
  run: |                 
    git config --local user.email "actions@github.com"
    git config --local user.name "GitHub Actions"
    git pull origin main
    git add ./sub
    git add README.md
    git commit -m "$(date '+%Y-%m-%d %H:%M:%S') 节点获取成功 "
- name: 推送更改
  uses:  ad-m/github-push-action@master
  with:
    branch: main

Sweet RX's blog

学习笔记 - python功能命令收集

导航

系统相关操作

时间

检查使用的系统

获取文件相对路径、绝对路径

获取当前路径地址

文件操作

创建文件夹

删除文件夹

打开文件，读取方式

写入文件

下载文件

获取文件内容行数

数据处理

列表

列表增加

将文件每行内容读取成列表

将列表内容，以行写入字符串

findall查找内容写入列表

列表循环读取

列表去重

列表进度条

检测网站是否可用

多线程

进度条

Linux系统

.sh 操作

github Actions workflows

触发条件

基本系统配置

安装依赖

安装工具

提交