I was looking at express.js source code, to find out how it maps named route parameters to req.params properties. 
For those who don't know, in express.js you can define routes with named parameters, make them optional, only allow the ones with specific format (and more):
app.get("/user/:id/:name?/:age(\\d+)", function (req, res) {
    console.log("ID is", req.params.id);
    console.log("Name is", req.params.name || "not specified!");
    console.log("Age is", req.params.age);
});
I realized that the heart of this functionality is a method called pathRegexp() defined in lib/utils.js. The method definition is as follows:
function pathRegexp(path, keys, sensitive, strict) {
    if (path instanceof RegExp) return path;
    if (Array.isArray(path)) path = '(' + path.join('|') + ')';
    path = path
        .concat(strict ? '' : '/?')
        .replace(/\/\(/g, '(?:/')
        .replace(/(\/)?(\.)?:(\w+)(?:(\(.*?\)))?(\?)?(\*)?/g, function (_, slash, format, key, capture, optional, star) {
            keys.push({ name: key, optional: !! optional });
            slash = slash || '';
            return ''
                + (optional ? '' : slash)
                + '(?:'
                + (optional ? slash : '')
                + (format || '') + (capture || (format && '([^/.]+?)' || '([^/]+?)')) + ')'
                + (optional || '')
                + (star ? '(/*)?' : '');
        })
        .replace(/([\/.])/g, '\\$1')
        .replace(/\*/g, '(.*)');
    return new RegExp('^' + path + '$', sensitive ? '' : 'i');
}
The important part is the regex on line 7, /(\/)?(\.)?:(\w+)(?:(\(.*?\)))?(\?)?(\*)?/g which groups the matched portions of pathname this way:
slash    
the / symbol                                                                                                                    
format I don't know what is the purpose of this one, explanation needed.
key      
the word (ie. \w+) after the : symbol                                                                           
capture
a regex written in front of the key. Should be wrapped in parenthesis (ex. (.\\d+))
optional
the ? symbol after the key                                                                                            
star      
the * symbol                                                                                                                     
and the callback handler builds a regex from the groups above.
Now the question is, what is the purpose of format here?
My understanding according to the following line:
(format || '') + (capture || (format && '([^/.]+?)' || '([^/]+?)'))
and the mentioned regex is,
if you put a . symbol after the slash group and don't specify a match condition (the regex wrapped in parenthesis after the key), the generated regex matches the rest of path until it gets to a . or / symbol.
So what's the point?
I'm asking this, because:
It is for matching file extensions and such properly.
Given the path '/path/:file.:ext', consider the difference between the expressions:
// With 'format' checking
/^\/path\/(?:([^\/]+?))(?:\.([^\/\.]+?))\/?$/
// Without 'format' checking
/^\/path\/(?:([^\/]+?))(?:([^\/]+?))\/?$/
In the first case, you end up with params as
{
    file: 'file',
    ext: 'js'
}
but without the format checking, you end up with this:
{
    file: 'f',
    ext: 'ile.js'
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With