# 模块

定义：用来从逻辑上组织python代码（变量，函数，累，逻辑：实现一个功能），本质就是.py结尾的python文件（文件名：test.py对用的模块名：test）

# 导入方法

import module_name
import module1_name,module2_name
from module_file import *
from module_file import module1_name,module2_name
from module_file import module_name as mn

1
2
3
4
5

# import本质（路径搜索和搜索路径）

导入模块的本质就是把python文件解释一遍导入包的本质就是解释包下面的__init__.py文件 **包的本质：**用来从逻辑上组织模块，本质就是一个目录（必须带有一个__init__.py文件）如果要导入包下面的模块文件，就要修改__init__.py文件，在里面导入模块文件

# 模块的分类

标准库
开源模块（第三方模块）
自定义模块 之前简单做过介绍，现在详细介绍一下 https://www.jianshu.com/p/239014d3aae9 (opens new window)

# 标准库

###time、datetime模块 python中，通常有这几种方式表示时间：

时间戳
格式化的时间字符串
元组（struct_time）共九个元素

时间戳： 表示从1970年1月1日00:00开始按秒计算偏移量，运行‘type(time.time())’,返回的是float类型，返回时间戳方式的函数主要有time()

元组（struct_time）: struct_time元组共有9个元素，返回struct_time的函数主要有gmtime()、localtime()、strptime()

time

import time

print(time.time())#得到当前时间的时间戳
#输出：1565966794.21425
print(time.timezone)#utc时间与本地时间的差值（时间戳）
#输出：-28800
print(time.altzone)#utc时间与夏令时间的差值（时间戳）
#输出：-32400
print(time.localtime())#返回本地时间的struct_time对象格式
#输出：time.struct_time(tm_year=2019, tm_mon=8, tm_mday=16, tm_hour=22, tm_min=53, tm_sec=29, tm_wday=4, tm_yday=228, tm_isdst=0)
print(time.gmtime())#返回utc时间的struct_time对象格式
#输出：time.struct_time(tm_year=2019, tm_mon=8, tm_mday=16, tm_hour=14, tm_min=58, tm_sec=3, tm_wday=4, tm_yday=228, tm_isdst=0)
print(time.asctime())#将struct_time对象格式转换成本地时间格式：Fri Aug 16 23:00:14 2019
#输出：Fri Aug 16 23:00:14 2019
print(time.ctime())#将时间戳转换成本地时间格式：Fri Aug 16 23:00:14 2019
#输出：Fri Aug 16 23:00:14 2019
string_struct=time.strptime('2019/8/16 23:04','%Y/%m/%d %H:%M')#将日期字符串转成struct时间对象格式
print(string_struct)
#输出：time.struct_time(tm_year=2019, tm_mon=8, tm_mday=16, tm_hour=23, tm_min=4, tm_sec=0, tm_wday=4, tm_yday=228, tm_isdst=-1)
print(time.strftime('%Y/%m/%d %H:%M',string_struct))#将struct时间对象转成日期字符串
#输出：2019/08/16 23:04
print(time.mktime(string_struct))#将struct时间对象转成时间戳
#输出：1565967840.0

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23

struct_time和时间戳<->时间字符串格式的转换：

格式化的时间字符串<->struct_time<->时间戳的相互转换

datetime

import datetime

print(datetime.datetime.now())#返回格式化日期格式
#输出：2019-08-17 00:32:25.011494
print(datetime.datetime.now() + datetime.timedelta(3)) #当前时间+3天
#输出：2019-08-20 00:32:25.011494
print(datetime.datetime.now() + datetime.timedelta(-3)) #当前时间-3天
#输出：2019-08-14 00:32:25.011494
print(datetime.datetime.now() + datetime.timedelta(hours=3)) #当前时间+3小时
#输出：2019-08-17 03:32:25.011494
print(datetime.datetime.now() + datetime.timedelta(minutes=30)) #当前时间+30分
#输出：2019-08-17 01:02:25.011494
c_time  = datetime.datetime.now()
print(c_time.replace(minute=3,hour=2))#时间替换，将小时替换为2，分钟变为3
#输出：2019-08-17 02:03:25.011494

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

# random模块

import random

print(random.random())#取0-1的随机浮点数
#输出：0.07306541518636855
print(random.randint(1,3))#取[1,3]闭区间的整数
print(random.randrange(1,3))#取[1,3)半闭半开区间的整数
print(random.choice('abcde'))#随机取字符串中的字符
print(random.choice([1,2,3,4,5,6]))#随机取列表中的数据
print(random.sample('abcde',2))#随机取两位数据
#输出：['b', 'e']
print(random.uniform(1,3))#取（1,3）区间的浮点数
#输出：2.1186016630230196
a=[1,2,3,4,5,6]
random.shuffle(a)#随机打乱数据顺序
print(a)
#输出：[3, 1, 5, 6, 2, 4]

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

生成随机验证码：

checkcode = ''
for i in range(6):
    b_current = random.randrange(0,6)
    if b_current != i:
        s_current=random.randrange(0,6)
        if s_current==b_current:
            temp = chr(random.randint(97,122))
        else:
            temp = chr(random.randint(65,90))
    else:
        temp = random.randint(0,9)
    checkcode += str(temp)
print (checkcode)

1
2
3
4
5
6
7
8
9
10
11
12
13

# os模块

import os

print(os.getcwd())#获取当前工作目录，即当前python脚本工作的目录路径
os.chdir('C://')#改变当前脚本工作目录；相当于shell下cd
print(os.curdir)#返回当前目录: ('.')
print(os.pardir)#获取当前目录的父目录字符串名：('..')
os.makedirs(r'd:\a\b\c')#可生成多层递归目录
os.removedirs(r'd:\a\b\c')#若目录为空，则删除，并递归到上一级目录，如若也为空，则删除，依此类推
os.mkdir(r'd:a')#生成单级目录；相当于shell中mkdir dirname
os.rmdir(r'd:a')# 删除单级空目录，若目录不为空则无法删除，报错；相当于shell中rmdir dirname
print(os.listdir(r'd:'))#列出指定目录下的所有文件和子目录，包括隐藏文件，并以列表方式打印
#输出：['$360Section', '$RECYCLE.BIN', '360Downloads', '360Rec', '360安全浏览器下载', 'BaiduNetdiskDownload', 'computer software', 'Config.Msi']
os.remove(r'd:\a')#删除一个文件
os.rename(r'd:\a',r'd:\b')#重命名文件/目录
print(os.stat(r'd:360安全浏览器下载'))#获取文件/目录信息
#输出：os.stat_result(st_mode=16895, st_ino=5066549580791849, st_dev=485194, st_nlink=1, st_uid=0, st_gid=0, st_size=0, st_atime=1565493157, st_mtime=1565493157, st_ctime=1563296312)
print(os.environ)#当前系统的环境变量
print(os.name)#输出字符串指示当前使用平台。win->'nt'; Linux->'posix'
os.system('dir')#运行shell命令，直接显示
print(os.path.abspath('./'))#返回path规范化的绝对路径
print(os.path.split(r'd:a.txt'))#将path分割成目录和文件名二元组返回
#输出：('d:', 'a.txt')
print(os.path.dirname(os.path.abspath('./')))#返回path的目录。其实就是os.path.split(path)的第一个元素
print(os.path.basename(os.path.abspath('./')))#返回path最后的文件名。如何path以／或\结尾，那么就会返回空值。即os.path.split(path)的第二个元素
print(os.path.exists(os.path.abspath('./')))#判断路径是否存在，如果path存在，返回True；如果path不存在，返回False
print(os.path.isabs(os.path.abspath('./')))#判断路径是否是绝对路径
print(os.path.isfile(os.path.abspath('./')))#判断路径是否是文件
print(os.path.isdir(os.path.abspath('./')))#判断路径是否是目录
print(os.path.join(r'dir\haha',r'module',r'file'))#将多个路径组合后返回，第一个绝对路径之前的参数将被忽略
#输出：dir\haha\module\file
import time
struct_time=time.localtime(os.path.getatime(os.path.abspath('./')))
print(time.strftime('%Y-%m-%d %H:%M:%S',struct_time))#返回path所指向的文件或者目录的最后存取时间,结果是时间戳，这里转化为字符串时间格式
#输出：2019-08-18 15:53:13
print(os.path.getmtime(os.path.abspath('./')))#返回path所指向的文件或者目录的最后修改时间
#输出：1566114793.5133495

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

print(os.sep)#输出操作系统特定的路径分隔符，win下为"\\",Linux下为"/"
print(os.linesep)#输出当前平台使用的行终止符，win下为"\r\n",Linux下为"\n"
print(os.pathsep)#输出用于分割文件路径的字符串

1
2
3

具体实现如下图：

# sys模块

#sys_module.py文件
import sys
print(sys.argv)#命令行参数List，第一个元素是程序本身路径

#cmd命令运行 并带上参数
E:\test\pycharm\file\python develop\day5>python sys_module.py 1 2 3 4 5
['sys_module.py', '1', '2', '3', '4', '5']

1
2
3
4
5
6
7

print(sys.version)#获取Python解释程序的版本信息
#输出：3.7.3 (v3.7.3:ef4ec6ed12, Mar 25 2019, 22:22:05) [MSC v.1916 64 bit (AMD64)]
print(sys.path)#返回模块的搜索路径，初始化时使用PYTHONPATH环境变量的值
print(sys.platform)#返回操作系统平台名称
sys.exit(0)#退出程序，正常退出时exit(0)

1
2
3
4
5

# shutil模块

高级的文件、文件夹、压缩包处理模块

# copyfileobj将文件内容拷贝到另一个文件中，可以部分内容

f1=open('buweixia',encoding='utf-8')
f2=open('buweixia2','w',encoding='utf-8')
shutil.copyfileobj(f1,f2)

1
2
3

# copyfile拷贝文件，自动打开文件

shutil.copyfile('buweixia2','buweixia3')

# copystat拷贝状态的信息，包括：mode bits, atime, mtime, flags

import os
print(os.stat('buweixia2'))
shutil.copystat('buweixia2','buweixia3')
print(os.stat('buweixia3'))
#输出：
# os.stat_result(st_mode=33206, st_ino=1688849860284406, st_dev=508939, st_nlink=1, st_uid=0, st_gid=0, st_size=1785, st_atime=1566120930, st_mtime=1566121662, st_ctime=1566120930)
# os.stat_result(st_mode=33206, st_ino=2251799813705719, st_dev=508939, st_nlink=1, st_uid=0, st_gid=0, st_size=1785, st_atime=1566120930, st_mtime=1566121662, st_ctime=1566121586)

1
2
3
4
5
6
7

# copy拷贝文件和权限

shutil.copy('buweixia3','buweixia4')

# copy2拷贝文件和状态信息

shutil.copy2('buweixia3','buweixia5')

# copytree递归的去拷贝文件

shutil.copytree('./copy_file','./haha')

# rmtree递归的去删除文件

shutil.rmtree('./copy_file')

# move递归的去移动文件,且可以重命名目录

shutil.move('./enen', './copy_file')

# make_archive创建压缩包并返回文件路径，例如：zip、tar

base_name：压缩包的文件名，也可以是压缩包的路径。只是文件名时，则保存至当前目录，否则保存至指定路径，如：www =>保存至当前路径如：/Users/wupeiqi/www =>保存至/Users/wupeiqi/
format：压缩包种类，“zip”, “tar”, “bztar”，“gztar”
root_dir：要压缩的文件夹路径（默认当前目录）
owner：用户，默认当前用户
group：组，默认当前组
logger：用于记录日志，通常是logging.Logger对象

shutil.make_archive('./tar_file','tar')

# shelve模块

将数据写入文件：

import shelve
f=shelve.open('./copy_file/buwexia')
info={'age':24,'sex':'m'}
name=['haha','enen','yoyo']
test=(1,2,3,4)
f['test']=test
f['name']=name
f['info']=info
f.close()

1
2
3
4
5
6
7
8
9

读取文件内容：

import shelve
f=shelve.open('./copy_file/buwexia')
print(f.get('test'))
print(f.get('name'))
print(f.get('info'))

1
2
3
4
5

输出：

(1, 2, 3, 4)
['haha', 'enen', 'yoyo']
{'age': 24, 'sex': 'm'}

1
2
3

# xml模块

xml是实现不同语言或程序之间进行数据交换的协议，跟json差不多，但json使用起来更简单

# Author:haha

import xml.etree.ElementTree as ET

tree=ET.parse('xml_file')
root=tree.getroot()
print(root.tag)
#输出：data

#遍历整个xml文档
for child in root:
    print(child.tag,child.attrib)
    for i in child:
        print(i.tag,i.attrib,i.text)
#输出：
'''
country {'name': 'Liechtenstein'}
rank {'updated': 'yes'} 2
year {} 2008
gdppc {} 141100
neighbor {'name': 'Austria', 'direction': 'E'} None
neighbor {'name': 'Switzerland', 'direction': 'W'} None
country {'name': 'Singapore'}
rank {'updated': 'yes'} 5
year {} 2011
gdppc {} 59900
neighbor {'name': 'Malaysia', 'direction': 'N'} None
country {'name': 'Panama'}
rank {'updated': 'yes'} 69
year {} 2011
gdppc {} 13600
neighbor {'name': 'Costa Rica', 'direction': 'W'} None
neighbor {'name': 'Colombia', 'direction': 'E'} None
'''
for child in root.iter('year'):#指定一个key值，查看text值
    print(child.tag,child.text)
#输出：
# year 2008
# year 2011
# year 2011

#修改和删除xml文档内容
for child in root.iter('year'):
    new_year=int(child.text)+2
    child.text=str(new_year)
    child.set('update','yes')
tree.write('xml_file')

for child in root.iter('year'):
    print(child.tag,child.text)
#输出：
# year 2010
# year 2013
# year 2013

for country in root.findall('country'):
    rank=int(country.find('rank').text)
    if rank>5:
        root.remove(country)

tree.write('xml_file')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

自己创建xml文档:

import xml.etree.ElementTree as ET

new_xml = ET.Element("namelist")
name = ET.SubElement(new_xml, "name", attrib={"enrolled": "yes"})
age = ET.SubElement(name, "age", attrib={"checked": "no"})
sex = ET.SubElement(name, "sex")
name.text='haha'
age.text = '33'
name2 = ET.SubElement(new_xml, "name", attrib={"enrolled": "no"})
age = ET.SubElement(name2, "age")
age.text = '19'
name2.text='enen'
et = ET.ElementTree(new_xml)  # 生成文档对象
et.write("test.xml", encoding="utf-8", xml_declaration=True)

ET.dump(new_xml)  # 打印生成的格式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

# configparser

# 生成config配置文件

import configparser

config=configparser.ConfigParser()
config['DEFAULT']={'ServerAliveInterval':'45',
                   'Compression': 'yes',
                    'CompressionLevel': '9'}

config['bitbucket.org'] = {}
config['bitbucket.org']['User'] = 'hg'
config['topsecret.server.com'] = {}
config['topsecret.server.com']['Host Port'] = '50022'     # mutates the parser
config['topsecret.server.com']['ForwardX11'] = 'no'  # same here
config['DEFAULT']['ForwardX11'] = 'yes'
with open('example.ini','w') as configfile:
    config.write(configfile)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

# 读取配置文件

import configparser

config=configparser.ConfigParser()
config.read('example.ini')
print(config.sections())#打印除default外的其他节点
print(config['bitbucket.org']['user'])#打印指定节点下的数据
print(config.defaults())#打印default下面的配置数据
print('bitbucket.org' in config)#判断指定数据是否在config中
print(config.options('bitbucket.org'))#打印指定节点的子列表+default下的key值数据
#输出：['user', 'serveraliveinterval', 'compression', 'compressionlevel', 'forwardx11']
print(config.items('bitbucket.org'))#打印指定节点+default下的数据
#输出：[('serveraliveinterval', '45'), ('compression', 'yes'), ('compressionlevel', '9'), ('forwardx11', 'yes'), ('user', 'hg')]
print(config.get('bitbucket.org','user'))#打印指定节点下指定key值的value数据
#输出：hg
print(config.getint('topsecret.server.com','host port'))#打印指定节点下指定key值的value int数据
#输出：50022

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

# 修改

config.set('bitbucket.org','user','haha')#修改指定节点的指定key值的value值，没有就创建
print(config.has_option('bitbucket.org','user'))#判断指定节点是否包含指定key值
print(config.add_section('bitbucket.org'))#添加指定节点
print(config.has_section('bitbucket.org'))#判断有无指定节点，没有就创建
config.remove_section('bitbucket.org')#删除指定节点
config.remove_option('bitbucket.org','user')#删除指定节点的指定key栏数据
config.write(open('example.ini','w'))#覆盖原来的文件

1
2
3
4
5
6
7

# hashlib模块

用于加密相关的操作 md5加密：

import hashlib

md=hashlib.md5()
md.update(b'hello')
print(md.hexdigest())
md.update("It's me,哈哈".encode(encoding='utf-8'))
print(md.hexdigest())

#验证
md2=hashlib.md5()
md2.update("helloIt's me,哈哈".encode(encoding='utf-8'))
print(md2.hexdigest())
#输出：
'''
5d41402abc4b2a76b9719d911017c592
12c18ee8a7b5bfa3ccb0e98109f4b1ea
12c18ee8a7b5bfa3ccb0e98109f4b1ea
'''

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

其他加密模块类似，只是将md5改为相对应的加密方式，例：sha256

import hashlib

sh=hashlib.sha256()
sh.update(b'hello')
print(sh.hexdigest())
sh.update("It's me,哈哈".encode(encoding='utf-8'))
print(sh.hexdigest())

#验证
sh2=hashlib.sha256()
sh2.update("helloIt's me,哈哈".encode(encoding='utf-8'))
print(sh2.hexdigest())
#输出：
'''
2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824
69bb59a9dd5d6b843423ddcbdf050aae0c0023d858dfe66a9b3edf3253791ec6
69bb59a9dd5d6b843423ddcbdf050aae0c0023d858dfe66a9b3edf3253791ec6
'''

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

hmac加密更加高端的加密手段

import hmac

h=hmac.new("hello',b'It's me,哈哈".encode(encoding='utf-8'))
print(h.hexdigest())
#输出：4c241f49f6acc0c8669d33a4ed805cd6

1
2
3
4
5

# re模块

常用正则表达式符号

'.'     默认匹配除\n之外的任意一个字符，若指定flag DOTALL,则匹配任意字符，包括换行
'^'     匹配字符开头，若指定flags MULTILINE,这种也可以匹配上(r"^a","\nabc\neee",flags=re.MULTILINE)
'$'     匹配字符结尾，或e.search("foo$","bfoo\nsdfsf",flags=re.MULTILINE).group()也可以
'*'     匹配*号前的字符0次或多次，re.findall("ab*","cabb3abcbbac")  结果为['abb', 'ab', 'a']
'+'     匹配前一个字符1次或多次，re.findall("ab+","ab+cd+abb+bba") 结果['ab', 'abb']
'?'     匹配前一个字符1次或0次
'{m}'   匹配前一个字符m次
'{n,m}' 匹配前一个字符n到m次，re.findall("ab{1,3}","abb abc abbcbbb") 结果'abb', 'ab', 'abb']
'|'     匹配|左或|右的字符，re.search("abc|ABC","ABCBabcCD").group() 结果'ABC'
'(...)' 分组匹配，re.search("(abc){2}a(123|456)c", "abcabca456c").group() 结果 abcabca456c
 
 
'\A'    只从字符开头匹配，re.search("\Aabc","alexabc") 是匹配不到的
'\Z'    匹配字符结尾，同$
'\d'    匹配数字0-9
'\D'    匹配非数字
'\w'    匹配[A-Za-z0-9]
'\W'    匹配非[A-Za-z0-9]
's'     匹配空白字符、\t、\n、\r , re.search("\s+","ab\tc1\n3").group() 结果 '\t'
 
'(?P<name>...)' 分组匹配 re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242").groupdict("city") 结果{'province': '3714', 'city': '81', 'birthday': '1993'}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

re.match 从头开始匹配
re.search 匹配包含
re.findall 把所有匹配到的字符放到以列表中的元素返回
re.splitall 以匹配到的字符当做列表分隔符
例：re.split('[0-9]','abc12f3GH'),结果为：['abc', '', 'f', 'GH']
re.sub      匹配字符并替换
例：re.sub('[0-9]+','|','abc12f3GH'),结果为：abc|f|GH
只替换一个数字：
re.sub('[0-9]+','|','abc12f3GH',count=1)，结果为：abc|f3GH

1
2
3
4
5
6
7
8
9

注：

[] 单字符取一个，比如[abc]会匹配a或b或c
但是，如果[]里面加上^则会变成排除这个字符，如[^abc]就表示不是a、不是b、也不是c 
另外，在[]里面可以使用-表示一个范围，如[0-9]表示从0到9，类似的还有[a-zA-Z]，如果要包含-字符，可以给它加上转义[\-]。 
关于[]常见的错误用法是：[ab|bc]用来表示ab或bc，实际上，它得到的结果是[abc|]，即a或b或c或|这4个字符（单字符）的任意一个。这里可以改成(ab|bc)。 
总结：[]里面的特殊符有五个：[]-\^，其他字符都是普通字符，包括*.?等。 
说明： 
* ^在[^ 的首位时候才有特殊意义 
* [0-9 -在不是首尾的时候有特殊意义 
* \[ \] 因为占用[] 本身字符，所以有特殊意义 
* \本身是转义符，有特殊意义

1
2
3
4
5
6
7
8
9
10

反斜杠的困扰：

print(re.search(r'\\','abc\d'))
#输出：<re.Match object; span=(3, 4), match='\\'>

1
2

flags匹配模式：

re.I(re.IGNORECASE): 忽略大小写（括号内是完整写法，下同）
例：re.search('[a-z]+','abcAMK',flags=re.I)
输出：<re.Match object; span=(0, 6), match='abcAMK'>
M(MULTILINE): 多行模式，改变'^'和'$'的行为（参见上图）
例：re.search('^[0-9]+','abcb\n123',flags=re.M)
输出：<re.Match object; span=(5, 8), match='123'>
S(DOTALL): 点任意匹配模式，改变'.'的行为
例：re.search('.+','abcb\n123',flags=re.S)
输出：<re.Match object; span=(0, 8), match='abcb\n123'>

1
2
3
4
5
6
7
8
9

← 数据类型、函数、基本方法