2 回答
TA贡献1829条经验 获得超7个赞
Java中正则表达式匹配的语法规则:
以下是整理出来的Java下运用正则表达式实现匹配的程序案例,代码如下:
package org.luosijin.test; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 正则表达式 * @version V5.0 * @author Admin * @date 2015-7-25 */ public class Regex { /** * @param args * @author Admin * @date 2015-7-25 */ public static void main(String[] args) { Pattern pattern = Pattern.compile( "b*g" ); Matcher matcher = pattern.matcher( "bbg" ); System.out.println(matcher.matches()); System.out.println(pattern.matches( "b*g" , "bbg" )); //验证邮政编码 System.out.println(pattern.matches( "[0-9]{6}" , "200038" )); System.out.println(pattern.matches( "//d{6}" , "200038" )); //验证电话号码 System.out.println(pattern.matches( "[0-9]{3,4}//-?[0-9]+" , "02178989799" )); getDate( "Nov 10,2009" ); charReplace(); //验证身份证:判断一个字符串是不是身份证号码,即是否是15或18位数字。 System.out.println(pattern.matches( "^//d{15}|//d{18}$" , "123456789009876" )); getString( "D:/dir1/test.txt" ); getChinese( "welcome to china,江西奉新,welcome,你!" ); validateEmail( "luosijin123@163.com" ); } /** * 日期提取:提取出月份来 * @param str * @author Admin * @date 2015-7-25 */ public static void getDate(String str){ String regEx= "([a-zA-Z]+)|//s+[0-9]{1,2},//s*[0-9]{4}" ; Pattern pattern = Pattern.compile(regEx); Matcher matcher = pattern.matcher(str); if (!matcher.find()){ System.out.println( "日期格式错误!" ); return ; } System.out.println(matcher.group( 1 )); //分组的索引值是从1开始的,所以取第一个分组的方法是m.group(1)而不是m.group(0)。 } /** * 字符替换:本实例为将一个字符串中所有包含一个或多个连续的“a”的地方都替换成“A”。 * * @author Admin * @date 2015-7-25 */ public static void charReplace(){ String regex = "a+" ; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher( "okaaaa LetmeAseeaaa aa booa" ); String s = matcher.replaceAll( "A" ); System.out.println(s); } /** * 字符串提取 * @param str * @author Admin * @date 2015-7-25 */ public static void getString(String str){ String regex = ".+/(.+)$" ; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(str); if (!matcher.find()){ System.out.println( "文件路径格式不正确!" ); return ; } System.out.println(matcher.group( 1 )); } /** * 中文提取 * @param str * @author Admin * @date 2015-7-25 */ public static void getChinese(String str){ String regex = "[//u4E00-//u9FFF]+" ;//[//u4E00-//u9FFF]为汉字 Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(str); StringBuffer sb = new StringBuffer(); while (matcher.find()){ sb.append(matcher.group()); } System.out.println(sb); } /** * 验证Email * @param email * @author Admin * @date 2015-7-25 */ public static void validateEmail(String email){ String regex = "[0-9a-zA-Z]+@[0-9a-zA-Z]+//.[0-9a-zA-Z]+" ; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(email); if (matcher.matches()){ System.out.println( "这是合法的Email" ); } else { System.out.println( "这是非法的Email" ); } } } |
TA贡献1998条经验 获得超6个赞
java中正则表达式的匹配其实是利用不确定的有穷自动机(NFA)结合向上追溯的算法来实现的。
以下是示例代码:
import java.util.* ;
/*
* An NFAState is a node with a set of outgoing edges to other
* NFAStates.
*
* There are two kinds of edges:
*
* (1) Empty edges allow the NFA to transition to that state without
* consuming a character of input.
*
* (2) Character-labelled edges allow the NFA to transition to that
* state only by consuming the character on the label.
*
*/
class NFAState
{
/*
* WARNING:
*
* The maximum integer character code we'll match is 255, which
* is sufficient for the ASCII character set.
*
* If we were to use this on the Unicode character set, we'd get
* an array index out-of-bounds exception.
*
* A ``proper'' implementation of this would not use arrays but
* rather a dynamic data structure like Vector.
*/
public static final int MAX_CHAR = 255 ;
public boolean isFinal = false ;
private ArrayList<NFAState> onChar[] = new ArrayList[MAX_CHAR] ;
private ArrayList<NFAState> onEmpty = new ArrayList() ;
/*
* Add a transition edge from this state to next which consumes
* the character c.
*/
public void addCharEdge(char c, NFAState next) {
onChar[(int)c].add(next) ;
}
/*
* Add a transition edge from this state to next that does not
* consume a character.
*/
public void addEmptyEdge(NFAState next) {
onEmpty.add(next) ;
}
public NFAState () {
for (int i = 0; i < onChar.length; i++)
onChar[i] = new ArrayList() ;
}
public boolean matches(String s) {
return matches(s,new ArrayList()) ;
}
private boolean matches(String s, ArrayList visited) {
/*
* When matching, we work character by character.
*
* If we're out of characters in the string, we'll check to
* see if this state if final, or if we can get to a final
* state from here through empty edges.
*
* If we're not out of characters, we'll try to consume a
* character and then match what's left of the string.
*
* If that fails, we'll ask if empty-edge neighbors can match
* the entire string.
*
* If that fails, the match fails.
*
* Note: Because we could have a circular loop of empty
* transitions, we'll have to keep track of the states we
* visited through empty transitions so we don't end up
* looping forever.
*/
if (visited.contains(this))
/* We've found a path back to ourself through empty edges;
* stop or we'll go into an infinite loop. */
return false ;
/* In case we make an empty transition, we need to add this
* state to the visited list. */
visited.add(this) ;
if (s.length() == 0) {
/* The string is empty, so we match this string only if
* this state is a final state, or we can reach a final
* state without consuming any input. */
if (isFinal)
return true ;
/* Since this state is not final, we'll ask if any
* neighboring states that we can reach on empty edges can
* match the empty string. */
for (NFAState next : onEmpty) {
if (next.matches("",visited))
return true ;
}
return false ;
} else {
/* In this case, the string is not empty, so we'll pull
* the first character off and check to see if our
* neighbors for that character can match the remainder of
* the string. */
int c = (int)s.charAt(0) ;
for (NFAState next : onChar[c]) {
if (next.matches(s.substring(1)))
return true ;
}
/* It looks like we weren't able to match the string by
* consuming a character, so we'll ask our
* empty-transition neighbors if they can match the entire
* string. */
for (NFAState next : onEmpty) {
if (next.matches(s,visited))
return true ;
}
return false ;
}
}
}
/*
* Here, an NFA is represented by an entry state and an exit state.
*
* Any NFA can be represented by an NFA with a single exit state by
* creating a special exit state, and then adding empty transitions
* from all final states to the special one.
*
*/
public class NFA
{
public NFAState entry ;
public NFAState exit ;
public NFA(NFAState entry, NFAState exit) {
this.entry = entry ;
this.exit = exit;
}
public boolean matches(String str) {
return entry.matches(str);
}
/*
* c() : Creates an NFA which just matches the character `c'.
*/
public static final NFA c(char c) {
NFAState entry = new NFAState() ;
NFAState exit = new NFAState() ;
exit.isFinal = true ;
entry.addCharEdge(c,exit) ;
return new NFA(entry,exit) ;
}
/*
* e() : Creates an NFA which matches the empty string.
*/
public static final NFA e() {
NFAState entry = new NFAState() ;
NFAState exit = new NFAState() ;
entry.addEmptyEdge(exit) ;
exit.isFinal = true ;
return new NFA(entry,exit) ;
}
/*
* rep() : Creates an NFA which matches zero or more repetitions
* of the given NFA.
*/
public static final NFA rep(NFA nfa) {
nfa.exit.addEmptyEdge(nfa.entry) ;
nfa.entry.addEmptyEdge(nfa.exit) ;
return nfa ;
}
/*
* s() : Creates an NFA that matches a sequence of the two
* provided NFAs.
*/
public static final NFA s(NFA first, NFA second) {
first.exit.isFinal = false ;
second.exit.isFinal = true ;
first.exit.addEmptyEdge(second.entry) ;
return new NFA(first.entry,second.exit) ;
}
/*
* or() : Creates an NFA that matches either provided NFA.
*/
public static final NFA or(NFA choice1, NFA choice2) {
choice1.exit.isFinal = false ;
choice2.exit.isFinal = false ;
NFAState entry = new NFAState() ;
NFAState exit = new NFAState() ;
exit.isFinal = true ;
entry.addEmptyEdge(choice1.entry) ;
entry.addEmptyEdge(choice2.entry) ;
choice1.exit.addEmptyEdge(exit) ;
choice2.exit.addEmptyEdge(exit) ;
return new NFA(entry,exit) ;
}
/* Syntactic sugar. */
public static final NFA re(Object o) {
if (o instanceof NFA)
return (NFA)o ;
else if (o instanceof Character)
return c((Character)o) ;
else if (o instanceof String)
return fromString((String)o) ;
else {
throw new RuntimeException("bad regexp") ;
}
}
public static final NFA or(Object... rexps) {
NFA exp = rexps[0] ;
for (int i = 1; i < rexps.length; i++) {
exp = or(exp,re(rexps[i])) ;
}
return exp ;
}
public static final NFA s(Object... rexps) {
NFA exp = e() ;
for (int i = 0; i < rexps.length; i++) {
exp = s(exp,re(rexps[i])) ;
}
return exp ;
}
public static final NFA fromString(String str) {
if (str.length() == 0)
return e() ;
else
return s(re(str.charAt(0)),fromString(str.substring(1))) ;
}
public static void main(String[] args) {
NFA pat = s(rep(or("foo","bar")),"") ;
String[] strings =
{ "foo" , "bar" ,
"foobar", "farboo", "boofar" , "barfoo" ,
"foofoobarfooX" ,
"foofoobarfoo" ,
} ;
for (String s : strings) {
System.out.println(s + "\t:\t" +pat.matches(s)) ;
}
}
}
添加回答
举报