/*
 * Jeffrey Friedl
 * Omron Corporation			ʳ
 * Nagaokakyoshi, Japan			617Ĺ
 *
 * jfriedl@nff.ncl.omron.co.jp
 *
 * This work is placed under the terms of the GNU General Purpose License
 * (the "GNU Copyleft").
 */

#include "config.h"
#include "assert.h"
#include <ctype.h>
#include "jregex.h"
#include "xmalloc.h"
#include "replace.h"

static unsigned
_apply_substitution(const regex_t *compiled,
		    unsigned char **new,
		    unsigned char **new_end_match,
		    const unsigned char *str,
		    unsigned str_len,
		    const unsigned char *startsearch,
		    const unsigned char *replace)
{
    unsigned char *nptr, *new_end;
    unsigned required_len;
    const unsigned char *ptr = replace;
    int i;

    if (regexec_paren_info_size < reg_max_paren_level_used(compiled))
	return APP_SUB_PAREN_INFO_NOT_ENOUGH;


    if (i = regexec(compiled, startsearch, str_len-(startsearch - str)), i==0)
	return APP_SUB_DID_NOT_MATCH;

    #ifndef NO_REGEXEC_MATCH_POINTS
    required_len = (regexec_match_start - str) +
	           (str + str_len) - regexec_match_end;
    #else
    required_len = 0;
    #endif

    /*
     * Calculate how much space we'll need for the new str.
     * Above we figure how much we'll need for what's not replaced, and
     * below we'll run through the replacement str looking for
     * things like "\&", "\1", etc., to be replaced by what matched.
     */
    while (*ptr)
    {
	unsigned char c;
	if (*ptr != '\\')
	{
	    /* just the character itself */
	  regular:
	    required_len++;
	    ptr++;
	    continue;
	}
	
	/* a backslash-something... something in 'c' below */
	c = *++ptr;

	#ifndef NO_REGEXEC_MATCH_POINTS
	if (c == '&') {
	    /* add for entire bit that was matched */
	    required_len += regexec_match_end - regexec_match_start;
	    ptr++;
	    continue;
	}
	#endif
	
	/* at this point, only \1 through \9 valid */
	if (!isascii(c) || !isdigit(c) || c == '0')
	    goto regular;
	ptr++;
	
	c -= '1';
	if (c >= regexec_paren_info_used)
	    continue;
	if (regexec_paren_info[c].match_end == 0 ||
	    regexec_paren_info[c].match_start == 0)
	    continue;
	/* add length of what was matched */
	required_len += regexec_paren_info[c].match_end - 
	    regexec_paren_info[c].match_start;
    }
    
    /* finalize length; get memory; set pointers */
    required_len += 1; 	/* final null */
    nptr = *new = xmalloc(required_len);
    new_end = nptr + required_len;

    ptr = replace;

    #ifndef NO_REGEXEC_MATCH_POINTS
    /* if needed, copy from BOL to start-of-match over to new string */
    if (regexec_match_start != str)
    {
	const unsigned char *sptr = str;
	while (sptr != regexec_match_start)
	    *nptr++ = *sptr++;
    }
    #endif


    /* run through the replacement */
    while (*ptr)
    {
	unsigned char c;
	assert(nptr < new_end);
	
	if (*ptr != '\\') {
	    /* copy over the raw character */
	    *nptr++ = *ptr++;
	    continue;
	}

	c = *++ptr;

        #ifndef NO_REGEXEC_MATCH_POINTS
	/* if \&, replace with matched text */
	if (c == '&') {
	    const unsigned char *sptr = regexec_match_start;
	    while (sptr < regexec_match_end)
		*nptr++ = *sptr++;
	    ptr++;
	    continue;
	}
        #endif

	if (!isascii(c) || !isdigit(c) || c == '0')
	{
	    *nptr++ = *ptr++; /* oops, not \1 through \9 */

	    continue;
	}
	c -= '1';
	ptr++;
	
	if (regexec_paren_info[c].match_end != 0 &&
	    regexec_paren_info[c].match_start != 0)
	{
	    const unsigned char *sptr = regexec_paren_info[c].match_start;
	    while (sptr < regexec_paren_info[c].match_end)
		*nptr++ = *sptr++;
	}
    }

    /* note where in the new string the replacement ends */
    *new_end_match = nptr;
    
    #ifndef NO_REGEXEC_MATCH_POINTS
    /* if needed, copy over from end-of-match to EOL */
    if (regexec_match_end < str + str_len)
    {
	const unsigned char *sptr = regexec_match_end;
	while (sptr < str + str_len)
	    *nptr++ = *sptr++;
    }
    #endif
    
    *nptr++ = '\0';
    assert(nptr == new_end);
    return APP_SUB_SUCCESS;
}

unsigned
apply_substitution(const regex_t *compiled,
		   unsigned char **new,
		   unsigned *matchcount,
		   const unsigned char *str,
		   unsigned str_len,
		   const unsigned char *replace,
		   unsigned count)
{
    unsigned char *old;
    unsigned char *new_end_match;
    unsigned dummy;
    int i;

    if (matchcount == 0)
	matchcount = &dummy;

    i = _apply_substitution(compiled, new, &new_end_match,
			    str, str_len, str, replace);
    *matchcount = 0;
    if (i != APP_SUB_SUCCESS)
	return i;

    (*matchcount)++;
    while (--count && *new_end_match != '\0') {
	old = *new;
	i = _apply_substitution(compiled, new, &new_end_match,
				*new, strlen((void*)*new),
				new_end_match, replace);
	if (i != APP_SUB_SUCCESS)
	    break;
	(*matchcount)++;
	free(old);
    }
    return APP_SUB_SUCCESS;
}

unsigned char *
sub(const unsigned char *str, unsigned stringlen,
    const unsigned char *pattern,
    const unsigned char *replace,
    unsigned flags,
    unsigned times)
    
{
    regex_t R;
    unsigned char *new = 0;
    int i =  regcomp(&R, pattern, flags);

#ifdef REGCOMP_SAVE_MATCHED_PAREN_INFO
    if (i == REGCOMP_NEED_SAVE_PAREN_INFO &&
	!(flags & REGCOMP_SAVE_MATCHED_PAREN_INFO))
	i =  regcomp(&R, pattern, flags|REGCOMP_SAVE_MATCHED_PAREN_INFO);
#endif

    if (i != REGCOMP_SUCCESS)
	return 0;

    if (apply_substitution(&R, &new, 0, str, stringlen, replace, times)
	!= APP_SUB_SUCCESS)
    {
	new = 0;
    }
    regfree(&R);
    return new;
}

#ifdef TEST
int main(int argc, char *argv[])
{
    const unsigned char *pattern = argc > 1 ? argv[1] : "a";
    const unsigned char *replace = argc > 2 ? argv[2] : ">A<";
    const unsigned char *str     = argc > 3 ? argv[3] : "xabxyzxaaaawz";
    unsigned char *new;
    int i;

    regexec_paren_info_size = 10;
    regexec_paren_info = xmalloc(sizeof(*regexec_paren_info) *
				 regexec_paren_info_size);

    constant_sub(i, pattern, REGCOMP_SAVE_MATCHED_PAREN_INFO,
		 str, strlen((void*)str), replace, 1000, new);

    if (i != APP_SUB_SUCCESS) {
	die("apply returns %d\n", i);
    }
    outputf("pattern is %s\n", pattern);
    outputf("replace is %s\n", replace);
    outputf("string  is %s\n", str);
    outputf("result  is %s\n", new);
    return 0;
}
#endif
