引用分组编号: m1=re.match(r'<([\w]+>)[\w]+</\1','<book>python</python>')
引用分组别名: m1=re.match(r'<(?P<mark>[\w]+>)[\w]+</(?P=mark)','<book>python</python>')
引用分组别名: m1=re.match(r'<(?P<mark>[\w]+>)[\w]+</(?P=mark)','<book>python</python>')
2018-02-10
i = 1
for (url,v) in D.items():
f = open('E:\\Eclipse-py\\imooc\\src\\regular\\images\\'+str(i)+'.jpg', 'wb+')
req = urllib.request.urlopen('https:'+url)
buf = req.read()
f.write(buf)
f.close()
i += 1
for (url,v) in D.items():
f = open('E:\\Eclipse-py\\imooc\\src\\regular\\images\\'+str(i)+'.jpg', 'wb+')
req = urllib.request.urlopen('https:'+url)
buf = req.read()
f.write(buf)
f.close()
i += 1
2018-02-07
import urllib.request
req = urllib.request.urlopen('https://www.imooc.com/course/list')
buf = req.read()
buf = buf.decode('utf-8')
import re
urlInfo = re.findall(r'//img.+?\.jpg', buf)
D = {}
for url in urlInfo:
D[url] = url
req = urllib.request.urlopen('https://www.imooc.com/course/list')
buf = req.read()
buf = buf.decode('utf-8')
import re
urlInfo = re.findall(r'//img.+?\.jpg', buf)
D = {}
for url in urlInfo:
D[url] = url
2018-02-07
#re.sub方法
info3 = re.sub(r'\d+', '101', str1)
print(info3)
def get_sub(m):
f = m.group()
f = int(f) + 1
return str(f)
info4 = re.sub(r'\d+', get_sub, str1)
print(info4)
#re.split方法
str2 = 'imooc:C++ C Java,Python'
info5 = re.split(r':| |,', str2)
print(info5)
info3 = re.sub(r'\d+', '101', str1)
print(info3)
def get_sub(m):
f = m.group()
f = int(f) + 1
return str(f)
info4 = re.sub(r'\d+', get_sub, str1)
print(info4)
#re.split方法
str2 = 'imooc:C++ C Java,Python'
info5 = re.split(r':| |,', str2)
print(info5)
2018-02-06
#coding:utf-8
import re
str1 = 'Java=100 C++=90 python=80'
#re.search方法
info1 = re.search(r'\d+', str1)
print(info1.group())
#re.findall方法
info2 = re.findall(r'\d+', str1)
print(info2)
#求和
print(sum(int(x) for x in info2))
import re
str1 = 'Java=100 C++=90 python=80'
#re.search方法
info1 = re.search(r'\d+', str1)
print(info1.group())
#re.findall方法
info2 = re.findall(r'\d+', str1)
print(info2)
#求和
print(sum(int(x) for x in info2))
2018-02-06
看评论里比较不错的例子1:
print re.match(r'(a)(b)(c)\1\2\3','abcabc').group()
例子2:
ma = re.match(r'<(?P<ht>[\w]+>)<(?P<h>[\w]+>)<(?P<s>[\w]+>).+</(?P=s)</(?P=h)</(?P=ht)',"<html><head><script>javascript:alert('hello world')</script></head></html>")
print ma.group()
print ma.groups() #('html>','head>','script>')
print re.match(r'(a)(b)(c)\1\2\3','abcabc').group()
例子2:
ma = re.match(r'<(?P<ht>[\w]+>)<(?P<h>[\w]+>)<(?P<s>[\w]+>).+</(?P=s)</(?P=h)</(?P=ht)',"<html><head><script>javascript:alert('hello world')</script></head></html>")
print ma.group()
print ma.groups() #('html>','head>','script>')
2018-02-06
变量正则表达式:[_a-zA-Z]+[_\w]*
163.com邮箱正则表达式:[\w]{6,20}@163\.com
163.com邮箱正则表达式:[\w]{6,20}@163\.com
2018-02-06
@吃鸟的菜 大赞
邮箱的正则表达式应该是[z-aA-Z0-9]{6,10}@163\.com 点前不加转义\是不正确的,点前不加转义\就是匹配任意一个字符。
邮箱的正则表达式应该是[z-aA-Z0-9]{6,10}@163\.com 点前不加转义\是不正确的,点前不加转义\就是匹配任意一个字符。
2018-02-06