How to use RegExp in Java?
HelloI try to create a class that uses regexp in Java.
The function of the class is to get substrings that matches a pattern.
For example if I got this string:
"Link to <A HREF=\"overview-summary.html\">Non-frame version.</A> </NOFRAMES> Link to <A HREF=\"index.html\">Non-frame version.</A> </NOFRAMES>"
I would like to get all <A HR...</A> like substrings...
<A HREF=\"overview-summary.html\">Non-frame version.</A>
<A HREF=\"index.html\">Non-frame version.</A>
In this case I would like to get 2 substrings since it contains 2 links.
But I can not manage to create the correct pattern to sreach for.
I have tried with: "<A.*</A>"
But then I ended up with one substring starting from the first <A HREF and ended with the last </A>
So if any one could help me out with this it would be great
Best regards
Fredrik
import java.io.*;
import java.util.*;
import java.util.regex.*;
public class TextManager
{
public static int countAppearences(String in, String p)
{
Pattern pattern = Pattern.compile(p);
Matcher matcher = pattern.matcher(in);
int i = 0;
while(matcher.find())
{
i++;
}
return i;
}
public static String[] getSubStringsForPattern(String in, String p)
{
String[] subStrings = new String[countAppearences(in, p)];
int startIndex = 0;
for(int i = 0; i < subStrings.length; i++)
{
Object[] objects = getSubStringForPattern(in, p, startIndex);
subStrings[i] = (String)objects[0];
startIndex = ((Integer)objects[1]).intValue();
}
return subStrings;
}
public static Object[] getSubStringForPattern(String in, String p, int fromIndex)
{
Object[] objects = new Object[2];
Pattern pattern = Pattern.compile(p);
Matcher matcher = pattern.matcher(in);
matcher.find(fromIndex);
int startIndex = matcher.start();
String subString = matcher.group();
//System.out.println("Start: " + startIndex + " End: " + (startIndex + subString.length()));
objects[0] = subString;
objects[1] = new Integer(startIndex + subString.length());
return objects;
}
public static void main(String[] args)
{
String searchString = "Link to <A HREF=\"overview-summary.html\">Non-frame version.</A> </NOFRAMES> Link to <A HREF=\"index.html\">Non-frame version.</A> </NOFRAMES>";
int j = TextManager.countAppearences(searchString, "<A.*</A>");
System.out.println("Count: " + j);
String[] subStrings = TextManager.getSubStringsForPattern(searchString, "<A.*</A>");
for(int i = 0; i < subStrings.length; i++)
{
System.out.println(subStrings[i]);
}
}
}