From: vishalgupta97 <vishalgupta7972@gmail.com>
Date: Fri, 25 May 2018 18:38:56 +0000 (+0530)
Subject: Implemented Lexer, Parser and AST module.
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=cf633fb61fd64b0b49c3d99ea9cb881ebc8cd1ac;p=thirdparty%2Fautomake.git

Implemented Lexer, Parser and AST module.

The grammar seperates the automake rule into left side and right side. It
divides the left side according to the primaries like PROGRAMS, LIBRARIES.

* Lexer.pm: Lexer module. Converts a string into tokens.
* ParserTable.pm: Contains the parsing table.
* automake.y: Contains the grammar.
* automake.png: Contains the state transition diagram generated by the
  grammar.
* Tree.pm: Contains submodules for creating tree nodes.
* parser.pl: Implements the parser.
---

diff --git a/lib/Automake/Parser/.gitignore b/lib/Automake/Parser/.gitignore
new file mode 100644
index 000000000..4253bbeb5
--- /dev/null
+++ b/lib/Automake/Parser/.gitignore
@@ -0,0 +1,3 @@
+*.gv
+*.dot
+*.tab.c
\ No newline at end of file
diff --git a/lib/Automake/Parser/Lexer.pm b/lib/Automake/Parser/Lexer.pm
new file mode 100644
index 000000000..ed8129a5d
--- /dev/null
+++ b/lib/Automake/Parser/Lexer.pm
@@ -0,0 +1,51 @@
+package Lexer;
+
+use Exporter;
+
+our @ISA = qw(Exporter);
+our @EXPORT = qw(lex);
+
+# lex(string)
+# Takes as input a string of line. Divides it into tokens as specified 
+# by Regex and outputs an array of Tokens. Every Token is an array having 
+# two values: token name and its value. If its an operator, it has only 
+# one value.
+sub lex($)
+{
+	my @tokens;
+	my $rhs = 0;
+	while( $_ )
+	{
+		if( $rhs && s/^(.+)//o )
+		{
+			push @tokens, ["rhs",$1];
+			$rhs=0;
+		}
+		elsif(s/^(PROGRAMS|LIBRARIES|LTLIBRARIES|LISP|PYTHON|JAVA|SCRIPTS|DATA|HEADERS|MASN|TEXINFOS)//o)
+		{
+			push @tokens, [$1];
+		}
+		elsif(s/^([a-zA-Z0-9]+)//o)
+		{
+			push @tokens, ["value",$1];
+		}
+		elsif(s/^(=)//o)
+		{
+			push @tokens, [$1];
+			$rhs = 1;
+		}
+		elsif(s/^(:|\n|_)//o)
+		{
+			push @tokens, [$1];
+		}
+		elsif(s/^(\r| )//o)
+		{
+		}
+		else
+		{
+			die "Incorrect input $_";
+		}
+	}
+	return @tokens;
+}
+
diff --git a/lib/Automake/Parser/ParserTable.pm b/lib/Automake/Parser/ParserTable.pm
new file mode 100644
index 000000000..f918f0519
--- /dev/null
+++ b/lib/Automake/Parser/ParserTable.pm
@@ -0,0 +1,48 @@
+package ParserTable;
+
+use Exporter;
+use Tree;
+
+our @ISA = qw(Exporter);
+our @Export = qw(@table $accept);
+
+#Stores the state number where the input is accepted
+our $accept=9;
+
+# Stores the state diagram. Its an array of hashes. Each index corresponds 
+# to ith state. Every key in hash corresponds to a token, value corresponds 
+# to next state. reduce key specifies the reduction of token. Its an array 
+# consisting of number of elements to be reduced and a reference to a function 
+# to create a node.
+our @table=(
+		{value => 1, input => 2, stmts => 3, stmt => 4, lhs => 5, optionlist => 6},
+		{":" => 7, "_" => 8},
+		{end => 9},
+		{value => 1, lhs => 5, optionlist => 6, stmt => 10, reduce => [1, \&input]}, #input : stmts
+		{"\n" => 11},
+		{"=" => 12},
+		{value => 13, PROGRAMS => 14, LIBRARIES => 15, LTLIBRARIES => 16, LISP => 17, PYTHON => 18, JAVA => 19, SCRIPTS => 20, DATA => 21, HEADERS => 22, MASN => 23, TEXINFOS => 24, primaries => 25},
+		{rhs => 26},
+		{reduce => [2, \&optionlist]}, #optionlist : value '_'
+		{},
+		{"\n" => 27},
+		{reduce => [2, \&stmts]}, #stmts : stmt '\n'
+		{rhs => 28},
+		{"_" =>29, reduce => [1, \&primaries]}, #primaries : value
+		{reduce => [1, \&primaries]}, #primaries : PROGRAMS
+		{reduce => [1, \&primaries]}, #primaries : LIBRARIES
+		{reduce => [1, \&primaries]}, #primaries : LTLIBRARIES
+		{reduce => [1, \&primaries]}, #primaries : LISP
+		{reduce => [1, \&primaries]}, #primaries : PYTHON
+		{reduce => [1, \&primaries]}, #primaries : JAVA
+		{reduce => [1, \&primaries]}, #primaries : SCRIPTS
+		{reduce => [1, \&primaries]}, #primaries : DATA
+		{reduce => [1, \&primaries]}, #primaries : HEADERS
+		{reduce => [1, \&primaries]}, #primaries : MASN
+		{reduce => [1, \&primaries]}, #primaries : TEXINFOS
+		{reduce => [2, \&lhs]}, #lhs : optionlist primaries
+		{reduce => [3, \&stmt]}, #stmt : value ':' rhs
+		{reduce => [3, \&stmts]}, #stmts : stmts stmt '\n'
+		{reduce => [3, \&stmt]}, #stmt : lhs '=' rhs 
+		{reduce => [3, \&optionlist]} #optionlist : optionlist value '_'
+		); 
\ No newline at end of file
diff --git a/lib/Automake/Parser/Tree.pm b/lib/Automake/Parser/Tree.pm
new file mode 100644
index 000000000..cad08fcde
--- /dev/null
+++ b/lib/Automake/Parser/Tree.pm
@@ -0,0 +1,164 @@
+package Tree;
+
+use Exporter;
+
+our @ISA = qw(Exporter);
+our @EXPORT = qw(input stmt stmts lhs primaries optionlist traverse printgraph);
+
+# Grammar Rule : (1) input => stmts
+# Create a node having child as stmts.
+sub input($)
+{
+	my ($val) = @_;
+	my %node = (name => input, childs => [$val]);
+	return \%node;
+}
+
+# Grammar Rule : (1) stmt => lhs '=' rhs
+# Create a node for Automake rule. It has lhs and rhs as childs.
+#				 (2) stmt => value ':' rhs
+# Create a node for Make rule. It has value and rhs as childs.
+sub stmt($$$)
+{
+	my ($lhs, $sym, $rhs) = @_;
+	my %node;
+	if($sym -> [0] eq '=')
+	{
+		my %rhsnode = (name => rhs, val => $rhs -> [1]);
+		%node = (name => stmt, childs => [$lhs, \%rhsnode],type => automake);
+	}
+	else
+	{
+		%node = (name => stmt, lhs => $lhs, rhs =>$rhs->[1],type => make);
+	} 
+	return \%node;
+}
+
+# Grammar Rule : (1) stmts=> stmt '\n'
+# Creates a node having a child as stmt
+#				 (2) stmts=> stmts stmt '\n'
+# Creates a node having a child as stmt. Insert the created node into 
+# the childs array of the stmts(First Argument).
+sub stmts($$;$)
+{
+	my ($val1,$val2,$val3) = @_;
+	if($val3 == undef)
+	{
+		my %node = (name => stmts, childs => [$val1]);
+		my %nodeval = (name => stmts, childs => [\%node]);
+		return \%nodeval;
+	}
+	else
+	{
+		my %node = (name => stmts,childs => [$val2]);
+		push @{$val1->{childs}},\%node;
+		return $val1;
+	}
+}
+
+# Grammar Rule : (1) lhs => optionlist primaries
+# Create a node for left hand side of variable defination consisting of 
+# option list and primary.
+sub lhs($$)
+{
+	my ($val1, $val2) = @_;
+	my %node = (name => lhs, childs => [$val1, $val2]);
+	return \%node;
+}
+
+# Grammar Rule : (1) primaries : PROGRAMS
+#				 (2) primaries : LIBRARIES
+#				 (3) primaries : LTLIBRARIES
+#				 (4) primaries : LISP
+#				 (5) primaries : PYTHON
+#				 (6) primaries : JAVA
+#				 (7) primaries : SCRIPTS
+#				 (8) primaries : DATA
+#				 (9) primaries : HEADERS
+#				 (10) primaries : MASN
+#				 (11) primaries : TEXINFOS
+#				 (12) primaries : value
+# Creates a node corresponding to the given primary.
+sub primaries($)
+{
+	my ($val) = @_;
+	my %node;
+	if( $val -> [0] eq 'value')
+	{
+		%node = ( name => primaries, val=> $val -> [1]);
+	}
+	else
+	{
+		%node = ( name => primaries, val => $val1);
+	}
+	return \%node;
+}
+
+# Grammar Rule : (1) optionlist : value '_'
+# Create a node having data value in val key.
+#				 (2) optionlist : optionlist value '_'
+# Add the data value to val key in the node pointed by optionlist(First Argument).
+sub optionlist($$;$)
+{
+	my ($val1, $val2, $val3) = @_;
+	if($val3 == undef)
+	{
+		my %node = (name => optionlist, val => [$val1 -> [1]]);
+		return \%node;
+	}
+	else
+	{
+		push @{$val1 -> {val}},$val2 -> [1];
+		return $val1;
+	}
+}
+
+# printgraph(Hash)
+# prints the AST by traversing the tree starting at node pointed by hash.
+sub printgraph($)
+{
+	my $FH;
+	open($FH, '>', 'ast.gv') or die $!;
+	print $FH "graph graphname {\n";
+	my ($ref) = @_;
+	print $FH "0 [label=\"Root\"];";
+	traverse($ref, $FH, 0, 1);
+	print $FH "}\n";
+	close $FH;
+}
+# traverse(Hash, File Handle, Parent Id, Node Id)
+# Traverses the tree recursively. Prints the information about the current 
+# node to file. Call all its child with Parent Id equal to current Node Id 
+# and Node Id equal to (Parent Id*2+i) where i is the ith Child.
+sub traverse($$$$)
+{
+	my ($ref,$FH,$parent,$id)=@_;
+	my %node = %$ref;
+	#print $level," ",$pos," ",$node{name}," ";
+	print $FH "$parent--$id;\n";
+	my $label = "";
+	@keys = sort grep {!/^childs/} keys %node;
+	foreach $key (@keys)
+	{
+		$label .= $key."=>";
+		if(ref($node{$key}) eq 'ARRAY')
+		{
+			$label .= join(" ",@{$node{$key}})."\n";
+		}
+		else
+		{
+			$label .= $node{$key}." ";
+		}
+	}
+	print $FH "$id [label=\"$label\"];";
+	if( $node{childs} )
+	{
+		my $val1 = $node{childs};
+		my $i = 1;
+		foreach $child (@$val1)
+		{
+			traverse($child,$FH,$id,2*$id+$i);
+			$i++;
+		}
+	}
+}
\ No newline at end of file
diff --git a/lib/Automake/Parser/ast.png b/lib/Automake/Parser/ast.png
new file mode 100644
index 000000000..2d81044d6
Binary files /dev/null and b/lib/Automake/Parser/ast.png differ
diff --git a/lib/Automake/Parser/automake.png b/lib/Automake/Parser/automake.png
new file mode 100644
index 000000000..c95bbb20a
Binary files /dev/null and b/lib/Automake/Parser/automake.png differ
diff --git a/lib/Automake/Parser/automake.y b/lib/Automake/Parser/automake.y
new file mode 100644
index 000000000..9d18adfad
--- /dev/null
+++ b/lib/Automake/Parser/automake.y
@@ -0,0 +1,24 @@
+%token value rhs PROGRAMS LIBRARIES LTLIBRARIES LISP PYTHON JAVA SCRIPTS DATA HEADERS MASN TEXINFOS
+%%
+
+input : stmts ;
+stmts : stmt '\n'
+		| stmts stmt '\n'
+stmt  : lhs '=' rhs  
+		| value ':' rhs 
+lhs   : optionlist primaries
+primaries : PROGRAMS 
+			| LIBRARIES
+			| LTLIBRARIES
+			| LISP
+			| PYTHON
+			| JAVA
+			| SCRIPTS
+			| DATA
+			| HEADERS
+			| MASN
+			| TEXINFOS
+			| value
+optionlist : value '_'
+			| optionlist value '_'
+%%
\ No newline at end of file
diff --git a/lib/Automake/Parser/input.txt b/lib/Automake/Parser/input.txt
new file mode 100644
index 000000000..c65f5d013
--- /dev/null
+++ b/lib/Automake/Parser/input.txt
@@ -0,0 +1,8 @@
+dist_bin_PROGRAMS = server client
+server_SOURCES = server.c db.c
+client_SOURCES = client.c dep.c
+noinst_LIBRARIES = libfoo.a
+noinst_LTLIBRARIES = foolib.a
+files_JAVA = a.java b.java
+files_PYTHON = chk.py app.py test.py
+test_SCRIPTS = t1.sh t2.sh
diff --git a/lib/Automake/Parser/parser.pl b/lib/Automake/Parser/parser.pl
new file mode 100644
index 000000000..0163224de
--- /dev/null
+++ b/lib/Automake/Parser/parser.pl
@@ -0,0 +1,70 @@
+#!/usr/bin/perl
+use strict;
+use Lexer;
+use Tree;
+use ParserTable;
+
+my $debug = 0;
+
+open ( data, "<input.txt" );
+
+#Stores the list of tokens generated by lexer.
+my @tokens; 
+
+while ( <data> )
+{
+	push @tokens, lex($_);
+}
+if( $debug )
+{
+	print "Lexer Output\n";
+	foreach my $token ( @tokens )
+	{
+		print join(" ", @{$token}), "\n";
+	}
+}
+
+push @tokens, ["end"];
+my @stack = (0);
+print "Parser Output\n" if $debug;
+
+while ( @stack )
+{
+	if($stack[-1] == $ParserTable::accept)
+	{
+		print "Complete\n";
+		printgraph( $stack[-4] );
+		last;
+	}
+	my @curr_token = @{ $tokens[0] };	
+	if(my $val = $ParserTable::table[ $stack[-1] ]{ $curr_token[0] })
+	{
+		push @stack, \@curr_token, $val;
+		shift @tokens;
+	}
+	elsif(my $val = $ParserTable::table[ $stack[-1] ]{ reduce })
+	{
+		my @val1 = @$val;
+		my @param;
+		for(my $i = 1; $i <= 2 * $val1[0]; $i++)
+		{
+			if($i%2 == 0)
+			{
+				$val = pop @stack;
+				push @param,$val;
+			}
+			else
+			{
+				pop @stack;
+			}
+		}
+		@param = reverse @param;
+		push @stack, $val1[1]->( @param );
+		push @stack, $ParserTable::table[ $stack[-2] ]{ $stack[-1]->{ name }};
+	}
+	else
+	{
+		die "Unexpected Token ". @curr_token."\n";
+	}
+	print @stack, "\n" if $debug;
+}
\ No newline at end of file