Strange enough there’s no regular expression class in the iPhone SDK.
Update: iOS 4 brings NSRegularExpression.
My simple wrapper around the regex.h C API is not safe for unicode matching patterns but does the job e.g. for parsing URLs. If you need more, have a look at RegexKitLite. My simple wrapper has the interface:
// RegExp.h // // Created by Marcus Rohrmoser on 26.08.09. // Copyright 2009 Marcus Rohrmoser. All rights reserved. #import < Foundation/Foundation.h > #import < regex.h > #define REGEXP_NSERROR_DOMAIN @"regex.h" #define NSRegExpPatternKey @"NSRegExpPatternKey" /** Very simple Objective-C Wrapper around the regex.h functions. */ @interface RegExp : NSObject { @private regex_t * c_regex; } /** Compile a RegExp. @param pattern See regex.h / regcomp for details. ASCII encodeable characters only! @param error in case of trouble: REGEXP_NSERROR_DOMAIN, errocode from regcomp and userinfo NSLocalizedDescriptionKey and NSRegExpPatternKey. @return the regexp or nil in case of error. */ +(RegExp*)RegExpWithString:(NSString*)pattern error:(NSError**)error; /** Perform a match. @param string text to search @return nil (no match) or the complete match plus all matching groups. */ -(NSArray*)match:(NSString*)string; @end
with implementation
// RegExp.m // // Created by Marcus Rohrmoser on 26.08.09. // Copyright 2009 Marcus Rohrmoser. All rights reserved. #import "RegExp.h" #define ENCODING NSASCIIStringEncoding /* Currently uses stringWithCString - which is bashed at + http://lists.apple.com/archives/cocoa-dev/2004/Nov/msg01643.html, but why? + What's wrong with it? + */ @implementation RegExp #pragma mark Internal Helpers -(id)initWithParsedPattern:(regex_t*)value_ { if ( self = [super init] ) c_regex = value_; return self; } +(NSError*)errorFromRegExpError:(regex_t*)regex pattern:(NSString*)pattern code:(NSInteger)code { const size_t errbuf_size = 1000; char * errbuf = calloc(errbuf_size, sizeof(errbuf[0])); regerror(code, regex, errbuf, errbuf_size); // regfree(regex); // free(regex); NSString *msg = [NSString stringWithCString:errbuf encoding:NSASCIIStringEncoding]; NSError *error = [NSError errorWithDomain:REGEXP_NSERROR_DOMAIN code:code userInfo:[NSDictionary dictionaryWithObjectsAndKeys: pattern, NSRegExpPatternKey, msg, NSLocalizedDescriptionKey, nil]]; free(errbuf); return error; } #pragma mark Public Interface +(RegExp*)RegExpWithString:(NSString*)value_ error:(NSError**)error { if(value_ == nil) return nil; const char * pat_c = [value_ cStringUsingEncoding:ENCODING]; regex_t * regex = calloc(1, sizeof(regex_t)); const int ec = regcomp(regex, pat_c, REG_EXTENDED); if(ec != 0) { if(error != nil) *error = [RegExp errorFromRegExpError:regex pattern:value_ code:ec]; regfree(regex); free(regex); return nil; } return [[[RegExp alloc] initWithParsedPattern:regex] autorelease]; } -(NSArray*)match:(NSString*)string { if(string == nil) return nil; const char * c_str = [string cStringUsingEncoding:ENCODING]; if(c_str == NULL) return nil; const int count = 1 + c_regex->re_nsub; regmatch_t match[count]; const int ec = regexec(c_regex, c_str, count, match, 0); if(ec != 0) return nil; NSMutableArray *arr = [NSMutableArray arrayWithCapacity:count]; for(int i = 0; i < count; i++) { if(match[i].rm_so == -1 && match[i].rm_eo == -1) [arr addObject:[NSNull null]]; else { const char * start = &c_str[ match[i].rm_so ]; const int length = match[i].rm_eo - match[i].rm_so; [arr addObject:[[[NSString alloc] initWithBytes:start length:length encoding:ENCODING] autorelease]]; } } return arr; } -(void)dealloc { regfree(c_regex); free(c_regex); [super dealloc]; } @end
and an accompanying testcase
// RegExpTC.m // // Created by Marcus Rohrmoser on 26.08.09. // Copyright 2009 Marcus Rohrmoser. All rights reserved. #include "TargetConditionals.h" #if !TARGET_IPHONE_SIMULATOR #warning TestCase ignored when not building for Simulator #else #import "../Classes/RegExp.h" #import < SenTestingKit/SenTestingKit.h > @interface RegExpTC : SenTestCase { } @end @implementation RegExpTC -(void)testYouTube { NSError *error = nil; RegExp *pat = [RegExp RegExpWithString:@"http.+\.youtube\.com/(v/|watch\?v=)([A-Za-z0-9._%-]+)" error:&error]; STAssertNil(error, @"pattern compile"); STAssertNotNil(pat, @"pattern compile"); NSArray *match = [pat match:@"http://www.youtube.com/v/61wkfmWzLq4&hl=de &fs=1&rel=0&color1=0x3a3a3a&color2=0x999999&hd=1&border=1"]; STAssertNotNil(match, @"must match"); STAssertEquals(3u, match.count, @"one matching subexpression"); STAssertEqualObjects(@"v/", [match objectAtIndex:1], @"snippet"); STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet"); // /youtube.com/watch?v=([A-Za-z0-9._%-]*)[&w;=+_-]*/ match = [pat match:@"http://www.youtube.com/watch?v=61wkfmWzLq4"]; STAssertNotNil(match, @"must match"); STAssertEquals(3u, match.count, @"one matching subexpression"); STAssertEqualObjects(@"watch?v=", [match objectAtIndex:1], @"snippet"); STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet"); match = [pat match:@"http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1&"]; STAssertNotNil(match, @"must match"); STAssertEqualObjects(@"v/", [match objectAtIndex:1], @"snippet"); STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet"); } -(void)testCocoaOk { NSError *error = nil; STAssertNil([RegExp RegExpWithString:nil error:&error], @"pattern nil"); STAssertNil(error, @"pattern must compile ok"); RegExp *reg = [RegExp RegExpWithString:@"http.+\.youtube\.com/v/([^\&]+)\&.*" error:&error]; STAssertNil(error, @"pattern must compile ok"); NSString *str = @"http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1 &rel=0&color1=0x3a3a3a&color2=0x999999&hd=1&border=1"; NSArray * match = [reg match:str]; STAssertNotNil(match, @"must match"); STAssertEquals(2u, match.count, @"two matches expected"); STAssertEqualObjects(str, [match objectAtIndex:0], @"complete match"); STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:1], @"first matching group"); STAssertNil([reg match:nil], @"must not match"); } -(void)testCocoaFail { NSError *error = nil; RegExp *reg = [RegExp RegExpWithString:@"" error:&error]; STAssertNil(reg, @"pattern must fail"); STAssertNotNil(error, @"pattern must fail"); STAssertEqualObjects(REGEXP_NSERROR_DOMAIN, error.domain, @"domain check"); STAssertEquals(REG_EMPTY, error.code, @"code check"); STAssertEqualObjects(@"empty (sub)expression", error.localizedDescription, @"description check"); STAssertEqualObjects(@"", [error.userInfo valueForKey:NSRegExpPatternKey], @"pattern info"); } -(void)testRaw { regex_t regex; const char * pattern = "http.+\.youtube\.com/v/([^\&]+)\&.*"; STAssertTrue(0 == regcomp(&regex, pattern, REG_EXTENDED), @"RegExp compile"); regmatch_t * match = calloc(1 + regex.re_nsub, sizeof(regmatch_t)); const char * string = "http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1&rel=0 &color1=0x3a3a3a&color2=0x999999&hd=1&border=1"; STAssertTrue(0 == regexec(&regex, string, 1 + regex.re_nsub, match, 0), @"RegExp Match"); STAssertTrue(1 == regex.re_nsub, @"Matching groups count"); STAssertTrue(0 == match[0].rm_so, @"Match 0 start"); STAssertTrue(127 == match[0].rm_eo, @"Match 0 start"); STAssertTrue(25 == match[1].rm_so, @"Match 1 start"); STAssertTrue(36 == match[1].rm_eo, @"Match 1 start"); free(match); regfree(&regex); } @end #endif
Trackbacks & Pingbacks 2
[...] http://blog.mro.name/2009/09/cocoa-wrapped-regexh/ [...]
[...] 대체 구현> http://apparentlogic.com/openflow/ < 정규표현식 라이브러리> http://blog.mro.name/2009/09/cocoa-wrapped-regexh/ http://regexkit.sourceforge.net/RegexKitLite/ <라이브러리 : JSON, DOM XML, Google Data [...]
Post a Comment