Cocoa wrapped regex.h

Tue, 01. Sep 2009

Categories: en development Tags: Cocoa iPhone NSRegularExpression Objective C regex YouTube

Strange enough there’s no regular expression class in the iPhone SDK.

Update: iOS 4 brings NSRegularExpression.

My simple wrapper around the regex.h C API is not safe for unicode matching patterns but does the job e.g. for parsing URLs. If you need more, have a look at RegexKitLite. My simple wrapper has the interface:

 1//  RegExp.h
 2//
 3//  Created by Marcus Rohrmoser on 26.08.09.
 4//  Copyright 2009 Marcus Rohrmoser. All rights reserved.
 5
 6#import <Foundation/Foundation.h>
 7#import <regex.h>
 8
 9#define REGEXP_NSERROR_DOMAIN   @"regex.h"
10#define NSRegExpPatternKey      @"NSRegExpPatternKey"
11
12/** Very simple Objective-C Wrapper around the regex.h functions.
13 */
14@interface RegExp : NSObject
15{
16    @private
17    regex_t * c_regex;
18}
19
20/** Compile a RegExp.
21 @param pattern See regex.h / regcomp for details. ASCII encodeable characters only!
22 @param error in case of trouble: REGEXP_NSERROR_DOMAIN, errocode from regcomp
23 and userinfo NSLocalizedDescriptionKey and NSRegExpPatternKey.
24 @return the regexp or nil in case of error.
25 */
26+(RegExp*)RegExpWithString:(NSString*)pattern error:(NSError**)error;
27
28/**
29 Perform a match.
30 @param string text to search
31 @return nil (no match) or the complete match plus all matching groups.
32 */
33-(NSArray*)match:(NSString*)string;
34
35@end

with implementation

 1//  RegExp.m
 2//
 3//  Created by Marcus Rohrmoser on 26.08.09.
 4//  Copyright 2009 Marcus Rohrmoser. All rights reserved.
 5
 6#import "RegExp.h"
 7
 8#define ENCODING NSASCIIStringEncoding
 9
10/* Currently uses stringWithCString - which is bashed at
11 + http://lists.apple.com/archives/cocoa-dev/2004/Nov/msg01643.html, but why?
12 + What's wrong with it?
13 + */
14@implementation RegExp
15
16#pragma mark Internal Helpers
17
18-(id)initWithParsedPattern:(regex_t*)value_
19{
20    if ( self = [super init] )
21        c_regex = value_;
22    return self;
23}
24
25+(NSError*)errorFromRegExpError:(regex_t*)regex pattern:(NSString*)pattern code:(NSInteger)code
26{
27    const size_t errbuf_size = 1000;
28    char * errbuf = calloc(errbuf_size, sizeof(errbuf[0]));
29    regerror(code, regex, errbuf, errbuf_size);
30//  regfree(regex);
31//  free(regex);
32
33    NSString *msg = [NSString stringWithCString:errbuf encoding:NSASCIIStringEncoding];
34    NSError *error = [NSError errorWithDomain:REGEXP_NSERROR_DOMAIN code:code
35        userInfo:[NSDictionary dictionaryWithObjectsAndKeys:
36            pattern,  NSRegExpPatternKey,
37            msg,  NSLocalizedDescriptionKey, nil]];
38    free(errbuf);
39    return error;
40}
41
42#pragma mark Public Interface
43
44+(RegExp*)RegExpWithString:(NSString*)value_ error:(NSError**)error
45{
46    if(value_ == nil)
47        return nil;
48    const char * pat_c = [value_ cStringUsingEncoding:ENCODING];
49    regex_t * regex = calloc(1, sizeof(regex_t));
50    const int ec = regcomp(regex, pat_c, REG_EXTENDED);
51    if(ec != 0)
52    {
53        if(error != nil)
54            *error = [RegExp errorFromRegExpError:regex pattern:value_ code:ec];
55        regfree(regex);
56        free(regex);
57        return nil;
58    }
59    return [[[RegExp alloc] initWithParsedPattern:regex] autorelease];
60}
61
62-(NSArray*)match:(NSString*)string
63{
64    if(string == nil)
65        return nil;
66    const char * c_str = [string cStringUsingEncoding:ENCODING];
67    if(c_str == NULL)
68        return nil;
69    const int count = 1 + c_regex->re_nsub;
70    regmatch_t match[count];
71    const int ec = regexec(c_regex, c_str, count, match, 0);
72    if(ec != 0)
73        return nil;
74    NSMutableArray *arr = [NSMutableArray arrayWithCapacity:count];
75    for(int i = 0; i < count; i++)
76    {
77        if(match[i].rm_so == -1 && match[i].rm_eo == -1)
78            [arr addObject:[NSNull null]];
79        else
80        {
81            const char * start = &c_str[ match[i].rm_so ];
82            const int length = match[i].rm_eo - match[i].rm_so;
83            [arr addObject:[[[NSString alloc] initWithBytes:start length:length encoding:ENCODING] autorelease]];
84        }
85    }
86    return arr;
87}
88
89-(void)dealloc
90{
91    regfree(c_regex);
92    free(c_regex);
93    [super dealloc];
94}
95@end

and an accompanying testcase

  1//  RegExpTC.m
  2//
  3//  Created by Marcus Rohrmoser on 26.08.09.
  4//  Copyright 2009 Marcus Rohrmoser. All rights reserved.
  5
  6#include "TargetConditionals.h"
  7#if !TARGET_IPHONE_SIMULATOR
  8#warning TestCase ignored when not building for Simulator
  9#else
 10
 11#import "../Classes/RegExp.h"
 12
 13#import <SenTestingKit/SenTestingKit.h>
 14
 15@interface RegExpTC : SenTestCase
 16{
 17
 18}
 19
 20@end
 21
 22@implementation RegExpTC
 23
 24-(void)testYouTube
 25{
 26    NSError *error = nil;
 27    RegExp *pat =
 28[RegExp RegExpWithString:@"http.+\.youtube\.com/(v/|watch\?v=)([A-Za-z0-9._%-]+)" error:&error];
 29    STAssertNil(error, @"pattern compile");
 30    STAssertNotNil(pat, @"pattern compile");
 31
 32    NSArray *match = [pat match:@"http://www.youtube.com/v/61wkfmWzLq4&hl=de
 33&fs=1&rel=0&color1=0x3a3a3a&color2=0x999999&hd=1&border=1"];
 34    STAssertNotNil(match, @"must match");
 35    STAssertEquals(3u, match.count, @"one matching subexpression");
 36    STAssertEqualObjects(@"v/", [match objectAtIndex:1], @"snippet");
 37    STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet");
 38
 39// /youtube.com/watch?v=([A-Za-z0-9._%-]*)[&w;=+_-]*/
 40
 41    match = [pat match:@"http://www.youtube.com/watch?v=61wkfmWzLq4"];
 42    STAssertNotNil(match, @"must match");
 43    STAssertEquals(3u, match.count, @"one matching subexpression");
 44    STAssertEqualObjects(@"watch?v=", [match objectAtIndex:1], @"snippet");
 45    STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet");
 46
 47    match = [pat match:@"http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1&"];
 48    STAssertNotNil(match, @"must match");
 49    STAssertEqualObjects(@"v/", [match objectAtIndex:1], @"snippet");
 50    STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet");
 51}
 52
 53-(void)testCocoaOk
 54{
 55    NSError *error = nil;
 56
 57    STAssertNil([RegExp RegExpWithString:nil error:&error], @"pattern nil");
 58    STAssertNil(error, @"pattern must compile ok");
 59
 60    RegExp *reg = [RegExp RegExpWithString:@"http.+\.youtube\.com/v/([^\&]+)\&.*"
 61error:&error];
 62    STAssertNil(error, @"pattern must compile ok");
 63
 64    NSString *str = @"http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1
 65&rel=0&color1=0x3a3a3a&color2=0x999999&hd=1&border=1";
 66    NSArray * match = [reg match:str];
 67    STAssertNotNil(match, @"must match");
 68
 69    STAssertEquals(2u, match.count, @"two matches expected");
 70    STAssertEqualObjects(str, [match objectAtIndex:0], @"complete match");
 71    STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:1], @"first matching group");
 72
 73    STAssertNil([reg match:nil], @"must not match");
 74}
 75
 76-(void)testCocoaFail
 77{
 78    NSError *error = nil;
 79    RegExp *reg = [RegExp RegExpWithString:@"" error:&error];
 80    STAssertNil(reg, @"pattern must fail");
 81    STAssertNotNil(error, @"pattern must fail");
 82    STAssertEqualObjects(REGEXP_NSERROR_DOMAIN, error.domain, @"domain check");
 83    STAssertEquals(REG_EMPTY, error.code, @"code check");
 84    STAssertEqualObjects(@"empty (sub)expression", error.localizedDescription, @"description check");
 85    STAssertEqualObjects(@"", [error.userInfo valueForKey:NSRegExpPatternKey], @"pattern info");
 86}
 87
 88-(void)testRaw
 89{
 90    regex_t regex;
 91    const char * pattern = "http.+\.youtube\.com/v/([^\&]+)\&.*";
 92    STAssertTrue(0 == regcomp(&regex, pattern, REG_EXTENDED), @"RegExp compile");
 93
 94    regmatch_t * match = calloc(1 + regex.re_nsub, sizeof(regmatch_t));
 95    const char * string = "http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1&rel=0
 96&color1=0x3a3a3a&color2=0x999999&hd=1&border=1";
 97
 98    STAssertTrue(0 == regexec(&regex, string, 1 + regex.re_nsub, match, 0), @"RegExp Match");
 99    STAssertTrue(1 == regex.re_nsub, @"Matching groups count");
100
101    STAssertTrue(0 == match[0].rm_so, @"Match 0 start");
102    STAssertTrue(127 == match[0].rm_eo, @"Match 0 start");
103
104    STAssertTrue(25 == match[1].rm_so, @"Match 1 start");
105    STAssertTrue(36 == match[1].rm_eo, @"Match 1 start");
106
107    free(match);
108    regfree(&regex);
109}
110
111@end
112#endif