Regular Expressions :: more examples

Trim String

 1        # trim string
 2        my $string = "\n\t Remove newlines, tabs, and spaces from both sides.    ";
 3
 4        # remove all white space from the beginning
 5        $string =~ s/^\s+//;
 6
 7        # remove all white space from the end
 8        $string =~ s/\s+$//;
 9
10        print $string;
11
12        # outputs 'Remove newlines, tabs, and spaces from both sides.'

Remove Non-digits

 1        # remove all characters except digits
 2
 3        my $string = '  1111 2222 3333-7777 ';
 4
 5        $string =~ s/\D//g;
 6
 7        print $string;
 8
 9        # outputs '1111222233337777'

Embed Comments

 1        #
 2        # How the 'x' modifier can increase readability
 3        #
 4
 5        # badly formed XML
 6        my $string = <<'XMLDOC';
 7        <?xml version="1.0" ?>
 8        <Data>
 9          <publisher>Simon & Schuster</publisher>
10          <author>John le Carr&eacute;</author>
11          <title>D&D: Subterranean Espionage</title>
12        </Data>
13        XMLDOC
14
15        # escape ampersands that are not part of a character entity
16        $string =~ s|&(;?[^&\s;]*(?=[&\s<"']))|\&amp\;$1|g;  
17
18        # With the 'x' modifier, comments and whitespace are ignored
19        # so the same regex can be written thusly:        
20
21        $string =~ s|
21
23          &(;?           # match '&' optionally followed by a ';'
24
25          [^&\s;]*       # ...followed by zero or more chars NOT in class
26
27          (?=[&\s<"']))  # see what's coming, but don't consume the chars in the class
28                         # this is so $1 backtracking capturing var will only get the
29                         # character(s) matched in the first set of parens
30                         # (positive lookahead assertion)
31                         #
32                         # this is to determine if we have a successful match of an
33                         # ampersand NOT part of an entity, in which case we want
34                         # to match and then replace
35
36          |\&amp\;$1|gx; # replace with escaped ampersand and restore character(s) that
37                         # immediately followed, but only up until the lookahead assertion
38                         # match
39
40        print $string;
41
42        # outputs:
43
44        <?xml version="1.0" ?>
45        <Data>
46          <publisher>Simon &amp; Schuster</publisher>
47          <author>John le Carr&eacute;</author>
48          <title>D&amp;D: Subterranean Espionage</title>
49        </Data>