Strange enough there’s no regular expression class in the iPhone SDK.
Update: iOS 4 brings NSRegularExpression
.
My simple wrapper around the regex.h C API is not safe for unicode matching patterns but does the job e.g. for parsing URLs. If you need more, have a look at RegexKitLite. My simple wrapper has the interface:
// RegExp.h
//
// Created by Marcus Rohrmoser on 26.08.09.
// Copyright 2009 Marcus Rohrmoser. All rights reserved.
#import <Foundation/Foundation.h>
#import <regex.h>
#define REGEXP_NSERROR_DOMAIN @"regex.h"
#define NSRegExpPatternKey @"NSRegExpPatternKey"
/** Very simple Objective-C Wrapper around the regex.h functions.
*/
@interface RegExp : NSObject
{
@private
regex_t * c_regex;
}
/** Compile a RegExp.
@param pattern See regex.h / regcomp for details. ASCII encodeable characters only!
@param error in case of trouble: REGEXP_NSERROR_DOMAIN, errocode from regcomp
and userinfo NSLocalizedDescriptionKey and NSRegExpPatternKey.
@return the regexp or nil in case of error.
*/
+(RegExp*)RegExpWithString:(NSString*)pattern error:(NSError**)error;
/**
Perform a match.
@param string text to search
@return nil (no match) or the complete match plus all matching groups.
*/
-(NSArray*)match:(NSString*)string;
@end
with implementation
// RegExp.m
//
// Created by Marcus Rohrmoser on 26.08.09.
// Copyright 2009 Marcus Rohrmoser. All rights reserved.
#import "RegExp.h"
#define ENCODING NSASCIIStringEncoding
/* Currently uses stringWithCString - which is bashed at
+ http://lists.apple.com/archives/cocoa-dev/2004/Nov/msg01643.html, but why?
+ What's wrong with it?
+ */
@implementation RegExp
#pragma mark Internal Helpers
-(id)initWithParsedPattern:(regex_t*)value_
{
if ( self = [super init] )
c_regex = value_;
return self;
}
+(NSError*)errorFromRegExpError:(regex_t*)regex pattern:(NSString*)pattern code:(NSInteger)code
{
const size_t errbuf_size = 1000;
char * errbuf = calloc(errbuf_size, sizeof(errbuf[0]));
regerror(code, regex, errbuf, errbuf_size);
// regfree(regex);
// free(regex);
NSString *msg = [NSString stringWithCString:errbuf encoding:NSASCIIStringEncoding];
NSError *error = [NSError errorWithDomain:REGEXP_NSERROR_DOMAIN code:code
userInfo:[NSDictionary dictionaryWithObjectsAndKeys:
pattern, NSRegExpPatternKey,
msg, NSLocalizedDescriptionKey, nil]];
free(errbuf);
return error;
}
#pragma mark Public Interface
+(RegExp*)RegExpWithString:(NSString*)value_ error:(NSError**)error
{
if(value_ == nil)
return nil;
const char * pat_c = [value_ cStringUsingEncoding:ENCODING];
regex_t * regex = calloc(1, sizeof(regex_t));
const int ec = regcomp(regex, pat_c, REG_EXTENDED);
if(ec != 0)
{
if(error != nil)
*error = [RegExp errorFromRegExpError:regex pattern:value_ code:ec];
regfree(regex);
free(regex);
return nil;
}
return [[[RegExp alloc] initWithParsedPattern:regex] autorelease];
}
-(NSArray*)match:(NSString*)string
{
if(string == nil)
return nil;
const char * c_str = [string cStringUsingEncoding:ENCODING];
if(c_str == NULL)
return nil;
const int count = 1 + c_regex->re_nsub;
regmatch_t match[count];
const int ec = regexec(c_regex, c_str, count, match, 0);
if(ec != 0)
return nil;
NSMutableArray *arr = [NSMutableArray arrayWithCapacity:count];
for(int i = 0; i < count; i++)
{
if(match[i].rm_so == -1 && match[i].rm_eo == -1)
[arr addObject:[NSNull null]];
else
{
const char * start = &c_str[ match[i].rm_so ];
const int length = match[i].rm_eo - match[i].rm_so;
[arr addObject:[[[NSString alloc] initWithBytes:start length:length encoding:ENCODING] autorelease]];
}
}
return arr;
}
-(void)dealloc
{
regfree(c_regex);
free(c_regex);
[super dealloc];
}
@end
and an accompanying testcase
// RegExpTC.m
//
// Created by Marcus Rohrmoser on 26.08.09.
// Copyright 2009 Marcus Rohrmoser. All rights reserved.
#include "TargetConditionals.h"
#if !TARGET_IPHONE_SIMULATOR
#warning TestCase ignored when not building for Simulator
#else
#import "../Classes/RegExp.h"
#import <SenTestingKit/SenTestingKit.h>
@interface RegExpTC : SenTestCase
{
}
@end
@implementation RegExpTC
-(void)testYouTube
{
NSError *error = nil;
RegExp *pat =
[RegExp RegExpWithString:@"http.+\.youtube\.com/(v/|watch\?v=)([A-Za-z0-9._%-]+)" error:&error];
STAssertNil(error, @"pattern compile");
STAssertNotNil(pat, @"pattern compile");
NSArray *match = [pat match:@"http://www.youtube.com/v/61wkfmWzLq4&hl=de
&fs=1&rel=0&color1=0x3a3a3a&color2=0x999999&hd=1&border=1"];
STAssertNotNil(match, @"must match");
STAssertEquals(3u, match.count, @"one matching subexpression");
STAssertEqualObjects(@"v/", [match objectAtIndex:1], @"snippet");
STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet");
// /youtube.com/watch?v=([A-Za-z0-9._%-]*)[&w;=+_-]*/
match = [pat match:@"http://www.youtube.com/watch?v=61wkfmWzLq4"];
STAssertNotNil(match, @"must match");
STAssertEquals(3u, match.count, @"one matching subexpression");
STAssertEqualObjects(@"watch?v=", [match objectAtIndex:1], @"snippet");
STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet");
match = [pat match:@"http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1&"];
STAssertNotNil(match, @"must match");
STAssertEqualObjects(@"v/", [match objectAtIndex:1], @"snippet");
STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet");
}
-(void)testCocoaOk
{
NSError *error = nil;
STAssertNil([RegExp RegExpWithString:nil error:&error], @"pattern nil");
STAssertNil(error, @"pattern must compile ok");
RegExp *reg = [RegExp RegExpWithString:@"http.+\.youtube\.com/v/([^\&]+)\&.*"
error:&error];
STAssertNil(error, @"pattern must compile ok");
NSString *str = @"http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1
&rel=0&color1=0x3a3a3a&color2=0x999999&hd=1&border=1";
NSArray * match = [reg match:str];
STAssertNotNil(match, @"must match");
STAssertEquals(2u, match.count, @"two matches expected");
STAssertEqualObjects(str, [match objectAtIndex:0], @"complete match");
STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:1], @"first matching group");
STAssertNil([reg match:nil], @"must not match");
}
-(void)testCocoaFail
{
NSError *error = nil;
RegExp *reg = [RegExp RegExpWithString:@"" error:&error];
STAssertNil(reg, @"pattern must fail");
STAssertNotNil(error, @"pattern must fail");
STAssertEqualObjects(REGEXP_NSERROR_DOMAIN, error.domain, @"domain check");
STAssertEquals(REG_EMPTY, error.code, @"code check");
STAssertEqualObjects(@"empty (sub)expression", error.localizedDescription, @"description check");
STAssertEqualObjects(@"", [error.userInfo valueForKey:NSRegExpPatternKey], @"pattern info");
}
-(void)testRaw
{
regex_t regex;
const char * pattern = "http.+\.youtube\.com/v/([^\&]+)\&.*";
STAssertTrue(0 == regcomp(®ex, pattern, REG_EXTENDED), @"RegExp compile");
regmatch_t * match = calloc(1 + regex.re_nsub, sizeof(regmatch_t));
const char * string = "http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1&rel=0
&color1=0x3a3a3a&color2=0x999999&hd=1&border=1";
STAssertTrue(0 == regexec(®ex, string, 1 + regex.re_nsub, match, 0), @"RegExp Match");
STAssertTrue(1 == regex.re_nsub, @"Matching groups count");
STAssertTrue(0 == match[0].rm_so, @"Match 0 start");
STAssertTrue(127 == match[0].rm_eo, @"Match 0 start");
STAssertTrue(25 == match[1].rm_so, @"Match 1 start");
STAssertTrue(36 == match[1].rm_eo, @"Match 1 start");
free(match);
regfree(®ex);
}
@end
#endif