Cocoa wrapped regex.h

Tue, 01. Sep 2009

Categories: en development Tags: Cocoa iPhone NSRegularExpression Objective C regex YouTube

Strange enough there’s no regular expression class in the iPhone SDK.

Update: iOS 4 brings NSRegularExpression.

My simple wrapper around the regex.h C API is not safe for unicode matching patterns but does the job e.g. for parsing URLs. If you need more, have a look at RegexKitLite. My simple wrapper has the interface:

//  RegExp.h
//
//  Created by Marcus Rohrmoser on 26.08.09.
//  Copyright 2009 Marcus Rohrmoser. All rights reserved.

#import <Foundation/Foundation.h>
#import <regex.h>

#define REGEXP_NSERROR_DOMAIN   @"regex.h"
#define NSRegExpPatternKey      @"NSRegExpPatternKey"

/** Very simple Objective-C Wrapper around the regex.h functions.
 */
@interface RegExp : NSObject
{
    @private
    regex_t * c_regex;
}

/** Compile a RegExp.
 @param pattern See regex.h / regcomp for details. ASCII encodeable characters only!
 @param error in case of trouble: REGEXP_NSERROR_DOMAIN, errocode from regcomp
 and userinfo NSLocalizedDescriptionKey and NSRegExpPatternKey.
 @return the regexp or nil in case of error.
 */
+(RegExp*)RegExpWithString:(NSString*)pattern error:(NSError**)error;

/**
 Perform a match.
 @param string text to search
 @return nil (no match) or the complete match plus all matching groups.
 */
-(NSArray*)match:(NSString*)string;

@end

with implementation

//  RegExp.m
//
//  Created by Marcus Rohrmoser on 26.08.09.
//  Copyright 2009 Marcus Rohrmoser. All rights reserved.

#import "RegExp.h"

#define ENCODING NSASCIIStringEncoding

/* Currently uses stringWithCString - which is bashed at
 + http://lists.apple.com/archives/cocoa-dev/2004/Nov/msg01643.html, but why?
 + What's wrong with it?
 + */
@implementation RegExp

#pragma mark Internal Helpers

-(id)initWithParsedPattern:(regex_t*)value_
{
    if ( self = [super init] )
        c_regex = value_;
    return self;
}

+(NSError*)errorFromRegExpError:(regex_t*)regex pattern:(NSString*)pattern code:(NSInteger)code
{
    const size_t errbuf_size = 1000;
    char * errbuf = calloc(errbuf_size, sizeof(errbuf[0]));
    regerror(code, regex, errbuf, errbuf_size);
//  regfree(regex);
//  free(regex);

    NSString *msg = [NSString stringWithCString:errbuf encoding:NSASCIIStringEncoding];
    NSError *error = [NSError errorWithDomain:REGEXP_NSERROR_DOMAIN code:code
        userInfo:[NSDictionary dictionaryWithObjectsAndKeys:
            pattern,  NSRegExpPatternKey,
            msg,  NSLocalizedDescriptionKey, nil]];
    free(errbuf);
    return error;
}

#pragma mark Public Interface

+(RegExp*)RegExpWithString:(NSString*)value_ error:(NSError**)error
{
    if(value_ == nil)
        return nil;
    const char * pat_c = [value_ cStringUsingEncoding:ENCODING];
    regex_t * regex = calloc(1, sizeof(regex_t));
    const int ec = regcomp(regex, pat_c, REG_EXTENDED);
    if(ec != 0)
    {
        if(error != nil)
            *error = [RegExp errorFromRegExpError:regex pattern:value_ code:ec];
        regfree(regex);
        free(regex);
        return nil;
    }
    return [[[RegExp alloc] initWithParsedPattern:regex] autorelease];
}

-(NSArray*)match:(NSString*)string
{
    if(string == nil)
        return nil;
    const char * c_str = [string cStringUsingEncoding:ENCODING];
    if(c_str == NULL)
        return nil;
    const int count = 1 + c_regex->re_nsub;
    regmatch_t match[count];
    const int ec = regexec(c_regex, c_str, count, match, 0);
    if(ec != 0)
        return nil;
    NSMutableArray *arr = [NSMutableArray arrayWithCapacity:count];
    for(int i = 0; i < count; i++)
    {
        if(match[i].rm_so == -1 && match[i].rm_eo == -1)
            [arr addObject:[NSNull null]];
        else
        {
            const char * start = &c_str[ match[i].rm_so ];
            const int length = match[i].rm_eo - match[i].rm_so;
            [arr addObject:[[[NSString alloc] initWithBytes:start length:length encoding:ENCODING] autorelease]];
        }
    }
    return arr;
}

-(void)dealloc
{
    regfree(c_regex);
    free(c_regex);
    [super dealloc];
}
@end

and an accompanying testcase

//  RegExpTC.m
//
//  Created by Marcus Rohrmoser on 26.08.09.
//  Copyright 2009 Marcus Rohrmoser. All rights reserved.

#include "TargetConditionals.h"
#if !TARGET_IPHONE_SIMULATOR
#warning TestCase ignored when not building for Simulator
#else

#import "../Classes/RegExp.h"

#import <SenTestingKit/SenTestingKit.h>

@interface RegExpTC : SenTestCase
{

}

@end

@implementation RegExpTC

-(void)testYouTube
{
    NSError *error = nil;
    RegExp *pat =
[RegExp RegExpWithString:@"http.+\.youtube\.com/(v/|watch\?v=)([A-Za-z0-9._%-]+)" error:&error];
    STAssertNil(error, @"pattern compile");
    STAssertNotNil(pat, @"pattern compile");

    NSArray *match = [pat match:@"http://www.youtube.com/v/61wkfmWzLq4&hl=de
&fs=1&rel=0&color1=0x3a3a3a&color2=0x999999&hd=1&border=1"];
    STAssertNotNil(match, @"must match");
    STAssertEquals(3u, match.count, @"one matching subexpression");
    STAssertEqualObjects(@"v/", [match objectAtIndex:1], @"snippet");
    STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet");

// /youtube.com/watch?v=([A-Za-z0-9._%-]*)[&w;=+_-]*/

    match = [pat match:@"http://www.youtube.com/watch?v=61wkfmWzLq4"];
    STAssertNotNil(match, @"must match");
    STAssertEquals(3u, match.count, @"one matching subexpression");
    STAssertEqualObjects(@"watch?v=", [match objectAtIndex:1], @"snippet");
    STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet");

    match = [pat match:@"http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1&"];
    STAssertNotNil(match, @"must match");
    STAssertEqualObjects(@"v/", [match objectAtIndex:1], @"snippet");
    STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:2], @"snippet");
}

-(void)testCocoaOk
{
    NSError *error = nil;

    STAssertNil([RegExp RegExpWithString:nil error:&error], @"pattern nil");
    STAssertNil(error, @"pattern must compile ok");

    RegExp *reg = [RegExp RegExpWithString:@"http.+\.youtube\.com/v/([^\&]+)\&.*"
error:&error];
    STAssertNil(error, @"pattern must compile ok");

    NSString *str = @"http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1
&rel=0&color1=0x3a3a3a&color2=0x999999&hd=1&border=1";
    NSArray * match = [reg match:str];
    STAssertNotNil(match, @"must match");

    STAssertEquals(2u, match.count, @"two matches expected");
    STAssertEqualObjects(str, [match objectAtIndex:0], @"complete match");
    STAssertEqualObjects(@"61wkfmWzLq4", [match objectAtIndex:1], @"first matching group");

    STAssertNil([reg match:nil], @"must not match");
}

-(void)testCocoaFail
{
    NSError *error = nil;
    RegExp *reg = [RegExp RegExpWithString:@"" error:&error];
    STAssertNil(reg, @"pattern must fail");
    STAssertNotNil(error, @"pattern must fail");
    STAssertEqualObjects(REGEXP_NSERROR_DOMAIN, error.domain, @"domain check");
    STAssertEquals(REG_EMPTY, error.code, @"code check");
    STAssertEqualObjects(@"empty (sub)expression", error.localizedDescription, @"description check");
    STAssertEqualObjects(@"", [error.userInfo valueForKey:NSRegExpPatternKey], @"pattern info");
}

-(void)testRaw
{
    regex_t regex;
    const char * pattern = "http.+\.youtube\.com/v/([^\&]+)\&.*";
    STAssertTrue(0 == regcomp(®ex, pattern, REG_EXTENDED), @"RegExp compile");

    regmatch_t * match = calloc(1 + regex.re_nsub, sizeof(regmatch_t));
    const char * string = "http://www.youtube.com/v/61wkfmWzLq4&hl=de&fs=1&rel=0
&color1=0x3a3a3a&color2=0x999999&hd=1&border=1";

    STAssertTrue(0 == regexec(®ex, string, 1 + regex.re_nsub, match, 0), @"RegExp Match");
    STAssertTrue(1 == regex.re_nsub, @"Matching groups count");

    STAssertTrue(0 == match[0].rm_so, @"Match 0 start");
    STAssertTrue(127 == match[0].rm_eo, @"Match 0 start");

    STAssertTrue(25 == match[1].rm_so, @"Match 1 start");
    STAssertTrue(36 == match[1].rm_eo, @"Match 1 start");

    free(match);
    regfree(®ex);
}

@end
#endif