#BSD Copyright (c) 2009, Jeff Forristal (wiretrip.net) #BSD All rights reserved. #BSD #BSD Redistribution and use in source and binary forms, with or without #BSD modification, are permitted provided that the following conditions #BSD are met: #BSD #BSD - Redistributions of source code must retain the above copyright #BSD notice, this list of conditions and the following disclaimer. #BSD #BSD - Redistributions in binary form must reproduce the above copyright #BSD notice, this list of conditions and the following disclaimer in the #BSD documentation and/or other materials provided with the distribution. #BSD #BSD THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS #BSD "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT #BSD LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS #BSD FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE #BSD COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, #BSD INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, #BSD BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; #BSD LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER #BSD CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT #BSD LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN #BSD ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE #BSD POSSIBILITY OF SUCH DAMAGE. =item B The goal is to parse the variable, human-readable HTML into concrete structures useable by your program. The forms functions does do a good job at making these structures, but I will admit: they are not exactly simple, and thus not a cinch to work with. But then again, representing something as complex as a HTML form is not a simple thing either. I think the results are acceptable for what's trying to be done. Anyways... Forms are stored in perl hashes, with elements in the following format: $form{'element_name'}=@([ 'type', 'value', @params ]) Thus every element in the hash is an array of anonymous arrays. The first array value contains the element type (which is 'select', 'textarea', 'button', or an 'input' value of the form 'input-text', 'input-hidden', 'input-radio', etc). The second value is the value, if applicable (it could be undef if no value was specified). Note that select elements will always have an undef value--the actual values are in the subsequent options elements. The third value, if defined, is an anonymous array of additional tag parameters found in the element (like 'onchange="blah"', 'size="20"', 'maxlength="40"', 'selected', etc). The array does contain one special element, which is stored in the hash under a NULL character ("\0") key. This element is of the format: $form{"\0"}=['name', 'method', 'action', @parameters]; The element is an anonymous array that contains strings of the form's name, method, and action (values can be undef), and a @parameters array similar to that found in normal elements (above). Accessing individual values stored in the form hash becomes a test of your perl referencing skills. Hint: to access the 'value' of the third element named 'choices', you would need to do: $form{'choices'}->[2]->[1]; The '[2]' is the third element (normal array starts with 0), and the actual value is '[1]' (the type is '[0]', and the parameter array is '[2]'). =cut ################################################################ # Cluster global variables %_forms_ELEMENTS = ( 'form' => 1, 'input' => 1, 'textarea' => 1, 'button' => 1, 'select' => 1, 'option' => 1, '/select' => 1 ); ################################################################ =item B Params: \$html_data Return: \@found_forms This function parses the given $html_data into libwhisker form hashes. It returns a reference to an array of hash references to the found forms. =cut sub forms_read { my $dr = shift; return undef if ( !ref($dr) || length($$dr) == 0 ); my $A = [ {}, [] ]; html_find_tags( $dr, \&_forms_parse_callback, 0, $A, \%_forms_ELEMENTS ); if ( scalar %{ $A->[0] } ) { push( @{ $A->[1] }, $A->[0] ); } return $A->[1]; } ################################################################ =item B Params: \%form_hash Return: $html_of_form [undef on error] This function will take the given %form hash and compose a generic HTML representation of it, formatted with tabs and newlines in order to make it neat and tidy for printing. Note: this function does *not* escape any special characters that were embedded in the element values. =cut sub forms_write { my $hr = shift; return undef if ( !ref($hr) || !( scalar %$hr ) ); return undef if ( !defined $$hr{"\0"} ); my $t = '
[0] . '" method="'; $t .= $$hr{"\0"}->[1] . '" action="' . $$hr{"\0"}->[2] . '"'; if ( defined $$hr{"\0"}->[3] ) { $t .= ' ' . join( ' ', @{ $$hr{"\0"}->[3] } ); } $t .= ">\n"; my ( $name, $ar ); while ( ( $name, $ar ) = each(%$hr) ) { next if ( $name eq "\0" ); next if ( $name eq '' && $ar->[0]->[0] eq '' ); foreach $a (@$ar) { my $P = ''; $P = ' ' . join( ' ', @{ $$a[2] } ) if ( defined $$a[2] ); $t .= "\t"; if ( $$a[0] eq 'textarea' ) { $t .= "\n"; } elsif ( $$a[0] =~ m/^input-(.+)$/ ) { $t .= "\n"; } elsif ( $$a[0] eq 'option' ) { $t .= "\t