]> git.ipfire.org Git - pakfire.git/blob - pakfire/packages/lexer.py
Slight modification of the makefile format.
[pakfire.git] / pakfire / packages / lexer.py
1 #!/usr/bin/python
2
3 import logging
4 import os
5 import re
6
7 from pakfire.constants import *
8
9 class LexerError(Exception):
10 pass
11
12
13 class LexerUnhandledLine(LexerError):
14 pass
15
16
17 class EndOfFileError(LexerError):
18 pass
19
20
21 class LexerUndefinedVariableError(LexerError):
22 pass
23
24
25 LEXER_VALID_PACKAGE_NAME = re.compile(r"[A-Za-z][A-Za-z0-9\_\-\+]")
26
27 # XXX need to build check
28 LEXER_VALID_SCRIPTLET_NAME = re.compile(r"((pre|post|posttrans)(in|un|up))")
29
30 LEXER_COMMENT_CHAR = "#"
31 LEXER_COMMENT = re.compile(r"^\s*#")
32 LEXER_QUOTES = "\"'"
33 LEXER_EMPTY_LINE = re.compile(r"^\s*$")
34
35 LEXER_DEFINITION = re.compile(r"^([A-Za-z0-9_\-]+)\s*(\+)?=\s*(.+)?")
36
37 LEXER_BLOCK_LINE_INDENT = "\t"
38 LEXER_BLOCK_LINE = re.compile(r"^\t(.*)$")
39 LEXER_BLOCK_END = re.compile(r"^end$")
40
41 LEXER_DEFINE_BEGIN = re.compile(r"^(def)?\s?([A-Za-z0-9_\-]+)$")
42 LEXER_DEFINE_LINE = LEXER_BLOCK_LINE
43 LEXER_DEFINE_END = LEXER_BLOCK_END
44
45 LEXER_PACKAGE_BEGIN = re.compile(r"^package ([A-Za-z0-9_\-\+\%\{\}]+)$")
46 LEXER_PACKAGE_LINE = LEXER_BLOCK_LINE
47 LEXER_PACKAGE_END = LEXER_BLOCK_END
48 LEXER_PACKAGE_INHERIT = re.compile(r"^template ([A-Z]+)$")
49
50 LEXER_SCRIPTLET_BEGIN = re.compile(r"^script ([a-z]+)\s?(/[A-Za-z0-9\-\_/]+)?$")
51 LEXER_SCRIPTLET_LINE = LEXER_BLOCK_LINE
52 LEXER_SCRIPTLET_END = LEXER_BLOCK_END
53
54 LEXER_TEMPLATE_BEGIN = re.compile(r"^template ([A-Z]+)$")
55 LEXER_TEMPLATE_LINE = LEXER_BLOCK_LINE
56 LEXER_TEMPLATE_END = LEXER_BLOCK_END
57
58 LEXER_BUILD_BEGIN = re.compile(r"^build$")
59 LEXER_BUILD_LINE = LEXER_BLOCK_LINE
60 LEXER_BUILD_END = LEXER_BLOCK_END
61
62 LEXER_DEPS_BEGIN = re.compile(r"^dependencies$")
63 LEXER_DEPS_LINE = LEXER_BLOCK_LINE
64 LEXER_DEPS_END = LEXER_BLOCK_END
65
66 LEXER_DISTRO_BEGIN = re.compile(r"^distribution$")
67 LEXER_DISTRO_LINE = LEXER_BLOCK_LINE
68 LEXER_DISTRO_END = LEXER_BLOCK_END
69
70 LEXER_PACKAGES_BEGIN = re.compile(r"^packages$")
71 LEXER_PACKAGES_LINE = LEXER_BLOCK_LINE
72 LEXER_PACKAGES_END = LEXER_BLOCK_END
73
74 LEXER_PACKAGE2_BEGIN = re.compile(r"^package$")
75 LEXER_PACKAGE2_LINE = LEXER_BLOCK_LINE
76 LEXER_PACKAGE2_END = LEXER_BLOCK_END
77
78 # Statements:
79 LEXER_EXPORT = re.compile(r"^export ([A-Za-z0-9_\-])\s*(\+)?=\s*(.+)$")
80 LEXER_UNEXPORT = re.compile(r"^unexport ([A-Za-z0-9_\-]+)$")
81 LEXER_INCLUDE = re.compile(r"^include (.+)$")
82
83 LEXER_VARIABLE = re.compile(r"\%\{([A-Za-z0-9_\-]+)\}")
84 LEXER_SHELL = re.compile(r"\%\(.*\)")
85
86
87 class Lexer(object):
88 def __init__(self, lines=[], parent=None, environ=None):
89 self.lines = lines
90 self.parent = parent
91
92 self._lineno = 0
93
94 # A place to store all definitions.
95 self._definitions = {}
96
97 # Init function that can be overwritten by child classes.
98 self.init(environ)
99
100 # Run the parser.
101 self.run()
102
103 def inherit(self, other):
104 self._definitions.update(other._definitions)
105
106 @property
107 def definitions(self):
108 return self._definitions
109
110 @classmethod
111 def open(cls, filename, *args, **kwargs):
112 f = open(filename)
113 lines = f.readlines()
114 f.close()
115
116 return cls(lines, *args, **kwargs)
117
118 @property
119 def lineno(self):
120 return self._lineno + 1
121
122 @property
123 def root(self):
124 if self.parent:
125 return self.parent.root
126
127 return self
128
129 def get_line(self, no, raw=False):
130 try:
131 line = self.lines[no]
132 except KeyError:
133 raise EndOfFileError
134
135 # Strip newline.
136 line = line.rstrip("\n")
137
138 # DEBUG
139 #print line
140
141 if raw:
142 return line
143
144 # strip comments - caution: quotations
145
146 if line.startswith(LEXER_COMMENT_CHAR):
147 return ""
148
149 # XXX fix removing of comments in lines
150 #i = -1
151 #length = len(line)
152 #quote = None
153
154 #for i in range(length):
155 # s = line[i]
156
157 # if s in LEXER_QUOTES:
158 # if quote == s:
159 # quote = None
160 # else:
161 # quote = s
162
163 # if s == LEXER_COMMENT_CHAR:
164 # return line[:i+1]
165
166 return line
167
168 def line_is_empty(self):
169 line = self.get_line(self._lineno)
170
171 m = re.match(LEXER_EMPTY_LINE, line)
172 if m:
173 return True
174
175 return False
176
177 def expand_string(self, s):
178 if s is None:
179 return ""
180
181 while s:
182 m = re.search(LEXER_VARIABLE, s)
183 if not m:
184 break
185
186 var = m.group(1)
187 s = s.replace("%%{%s}" % var, self.get_var(var))
188
189 return s
190
191 def get_var(self, key, default=None):
192 definitions = {}
193 definitions.update(self.root.definitions)
194 definitions.update(self.definitions)
195
196 val = None
197 try:
198 val = definitions[key]
199 except KeyError:
200 logging.warning("Undefined variable: %s" % key)
201 #if default is None:
202 # logging.warning("Undefined variable: %s" % key)
203 # raise LexerUndefinedVariableError, key
204
205 if val is None:
206 val = default
207
208 return self.expand_string(val)
209
210 def init(self, environ):
211 pass
212
213 def get_default_parsers(self):
214 return [
215 (LEXER_COMMENT, self.parse_comment),
216 (LEXER_DEFINITION, self.parse_definition),
217 (LEXER_DEFINE_BEGIN, self.parse_define),
218 # Needs to be done.
219 #(LEXER_EXPORT, self.parse_export),
220 #(LEXER_UNEXPORT, self.parse_unexport),
221 ]
222
223 def get_parsers(self):
224 return []
225
226 def parse_line(self):
227 # Skip empty lines.
228 if self.line_is_empty():
229 self._lineno += 1
230 return
231
232 line = self.get_line(self._lineno)
233
234 parsers = self.get_parsers() + self.get_default_parsers()
235
236 found = False
237 for pattern, func in parsers:
238 m = re.match(pattern, line)
239 if m:
240 # Hey, I found a match, we parse it with the subparser function.
241 found = True
242 func()
243
244 break
245
246 if not found:
247 raise LexerUnhandledLine, "%d: %s" % (self.lineno, line)
248
249 def read_block(self, pattern_start=None, pattern_line=None, pattern_end=None,
250 raw=False):
251 assert pattern_start
252 assert pattern_line
253 assert pattern_end
254
255 line = self.get_line(self._lineno)
256
257 m = re.match(pattern_start, line)
258 if not m:
259 raise LexerError
260
261 # Go in to next line.
262 self._lineno += 1
263
264 groups = m.groups()
265
266 lines = []
267 while True:
268 line = self.get_line(self._lineno, raw=raw)
269
270 m = re.match(pattern_end, line)
271 if m:
272 self._lineno += 1
273 break
274
275 m = re.match(pattern_line, line)
276 if m:
277 lines.append(m.group(1))
278 self._lineno += 1
279 continue
280
281 m = re.match(LEXER_EMPTY_LINE, line)
282 if m:
283 lines.append("")
284 self._lineno += 1
285 continue
286
287 if not line.startswith(LEXER_BLOCK_LINE_INDENT):
288 raise LexerError, "Line has not the right indentation: %d: %s" \
289 % (self.lineno, line)
290
291 raise LexerUnhandledLine, "%d: %s" % (self.lineno, line)
292
293 return (groups, lines)
294
295 def run(self):
296 while self._lineno < len(self.lines):
297 self.parse_line()
298
299 def parse_comment(self):
300 line = self.get_line(self._lineno)
301
302 if not line:
303 return
304
305 raise LexerUnhandledLine, "%d: %s" % (self.lineno, line)
306
307 def parse_definition(self, pattern=LEXER_DEFINITION):
308 line = self.get_line(self._lineno)
309
310 m = re.match(pattern, line)
311 if not m:
312 raise LexerError, "Not a definition: %s" % line
313
314 # Line was correctly parsed, can go on.
315 self._lineno += 1
316
317 k, o, v = m.groups()
318
319 if o == "+":
320 prev = self.definitions.get(k, None)
321 if prev is None and self.parent:
322 prev = self.parent.definitions.get(k, None)
323 if prev:
324 v = " ".join((prev or "", v))
325
326 # Handle backslash.
327 while v and v.endswith("\\"):
328 line = self.get_line(self._lineno)
329 self._lineno += 1
330
331 v = v[:-1] + line
332
333 self._definitions[k] = v
334
335 return k, v
336
337 def parse_define(self):
338 line = self.get_line(self._lineno)
339
340 m = re.match(LEXER_DEFINE_BEGIN, line)
341 if not m:
342 raise Exception, "XXX not a define"
343
344 # Check content of next line.
345 found = None
346 i = 1
347 while True:
348 line = self.get_line(self._lineno + i)
349
350 # Skip empty lines.
351 empty = re.match(LEXER_EMPTY_LINE, line)
352 if empty:
353 i += 1
354 continue
355
356 for pattern in (LEXER_DEFINE_LINE, LEXER_DEFINE_END):
357 found = re.match(pattern, line)
358 if found:
359 break
360
361 if found:
362 break
363
364 if found is None:
365 line = self.get_line(self._lineno)
366 raise LexerUnhandledLine, "%d: %s" % (self.lineno, line)
367
368 # Go in to next line.
369 self._lineno += 1
370
371 key = m.group(2)
372 assert key
373
374 value = []
375 while True:
376 line = self.get_line(self._lineno)
377
378 m = re.match(LEXER_DEFINE_END, line)
379 if m:
380 self._lineno += 1
381 break
382
383 m = re.match(LEXER_DEFINE_LINE, line)
384 if m:
385 self._lineno += 1
386 value.append(m.group(1))
387 continue
388
389 m = re.match(LEXER_EMPTY_LINE, line)
390 if m:
391 self._lineno += 1
392 value.append("")
393 continue
394
395 raise LexerError, "Unhandled line: %s" % line
396
397 self._definitions[key] = "\n".join(value)
398
399
400 class DefaultLexer(Lexer):
401 """
402 A lexer which only knows about about simple definitions and def.
403 """
404 pass
405
406
407 class TemplateLexer(DefaultLexer):
408 def init(self, environ):
409 # A place to store the scriptlets.
410 self.scriptlets = {}
411
412 @property
413 def definitions(self):
414 definitions = {}
415
416 assert self.parent
417 definitions.update(self.parent.definitions)
418 definitions.update(self._definitions)
419
420 return definitions
421
422 def get_parsers(self):
423 return [
424 (LEXER_SCRIPTLET_BEGIN, self.parse_scriptlet),
425 ]
426
427 def parse_scriptlet(self):
428 line = self.get_line(self._lineno)
429
430 m = re.match(LEXER_SCRIPTLET_BEGIN, line)
431 if not m:
432 raise Exception, "Not a scriptlet"
433
434 self._lineno += 1
435
436 name = m.group(1)
437
438 # check if scriptlet was already defined.
439 if self.scriptlets.has_key(name):
440 raise Exception, "Scriptlet %s is already defined" % name
441
442 path = m.group(2)
443 if path:
444 self.scriptlets[name] = {
445 "lang" : "bin",
446 "path" : self.expand_string(path),
447 }
448 return
449
450 lines = []
451 while True:
452 line = self.get_line(self._lineno, raw=True)
453
454 m = re.match(LEXER_SCRIPTLET_END, line)
455 if m:
456 self._lineno += 1
457 break
458
459 m = re.match(LEXER_SCRIPTLET_LINE, line)
460 if m:
461 lines.append(m.group(1))
462 self._lineno += 1
463 continue
464
465 m = re.match(LEXER_EMPTY_LINE, line)
466 if m:
467 lines.append("")
468 self._lineno += 1
469 continue
470
471 raise LexerUnhandledLine, "%d: %s" % (self.lineno, line)
472
473 self.scriptlets[name] = {
474 "lang" : "shell",
475 "scriptlet" : "\n".join(lines),
476 }
477
478
479 class PackageLexer(TemplateLexer):
480 def init(self, environ):
481 TemplateLexer.init(self, environ)
482
483 self._template = "MAIN"
484
485 assert isinstance(self.parent, PackagesLexer)
486
487 @property
488 def definitions(self):
489 definitions = {}
490
491 if self.template:
492 definitions.update(self.template.definitions)
493
494 definitions.update(self._definitions)
495
496 return definitions
497
498 @property
499 def template(self):
500 if not self._template:
501 return None
502
503 # Get template from parent.
504 try:
505 return self.root.templates[self._template]
506 except KeyError:
507 raise LexerError, "Template does not exist: %s" % self._template
508
509 def get_parsers(self):
510 parsers = [
511 (LEXER_PACKAGE_INHERIT, self.parse_inherit),
512 ] + TemplateLexer.get_parsers(self)
513
514 return parsers
515
516 def parse_inherit(self):
517 line = self.get_line(self._lineno)
518
519 m = re.match(LEXER_PACKAGE_INHERIT, line)
520 if not m:
521 raise LexerError, "Not a template inheritance: %s" % line
522
523 self._lineno += 1
524
525 self._template = m.group(1)
526
527 # Check if template exists.
528 assert self.template
529
530
531 class BuildLexer(DefaultLexer):
532 @property
533 def definitions(self):
534 return self._definitions
535
536 @property
537 def stages(self):
538 return self.definitions
539
540 def inherit(self, other):
541 """
542 Inherit everything from other lexer.
543 """
544 self._definitions.update(other._definitions)
545
546
547 class RootLexer(DefaultLexer):
548 def init(self, environ):
549 # A list of variables that should be exported in the build
550 # environment.
551 self.exports = []
552
553 # A place to store all packages and templates.
554 self.packages = PackagesLexer([], parent=self)
555
556 # Import all environment variables.
557 if environ:
558 for k, v in environ.items():
559 self._definitions[k] = v
560
561 self.exports.append(k)
562
563 # Place for build instructions
564 self.build = BuildLexer([], parent=self)
565
566 # Include all macros.
567 if not self.parent:
568 for macro in MACRO_FILES:
569 self.include(macro)
570
571 def include(self, file):
572 # Create a new lexer, and parse the whole file.
573 include = RootLexer.open(file, parent=self)
574
575 # Copy all data from the included file.
576 self.inherit(include)
577
578 def inherit(self, other):
579 """
580 Inherit everything from other lexer.
581 """
582 self._definitions.update(other._definitions)
583
584 self.build.inherit(other.build)
585 self.packages.inherit(other.packages)
586
587 for export in other.exports:
588 if not export in self.exports:
589 self.exports.append(export)
590
591 @property
592 def templates(self):
593 return self.packages.templates
594
595 def get_parsers(self):
596 return [
597 (LEXER_INCLUDE, self.parse_include),
598 (LEXER_PACKAGES_BEGIN, self.parse_packages),
599 (LEXER_BUILD_BEGIN, self.parse_build),
600 ]
601
602 def parse_build(self):
603 line = self.get_line(self._lineno)
604
605 m = re.match(LEXER_BUILD_BEGIN, line)
606 if not m:
607 raise LexerError, "Not a build statement: %s" % line
608
609 self._lineno += 1
610
611 lines = []
612
613 while True:
614 line = self.get_line(self._lineno)
615
616 m = re.match(LEXER_BUILD_END, line)
617 if m:
618 self._lineno += 1
619 break
620
621 m = re.match(LEXER_BUILD_LINE, line)
622 if m:
623 lines.append(m.group(1))
624 self._lineno += 1
625
626 # Accept empty lines.
627 m = re.match(LEXER_EMPTY_LINE, line)
628 if m:
629 lines.append(line)
630 self._lineno += 1
631 continue
632
633 build = BuildLexer(lines, parent=self)
634 self.build.inherit(build)
635
636 def parse_include(self):
637 line = self.get_line(self._lineno)
638
639 m = re.match(LEXER_INCLUDE, line)
640 if not m:
641 raise LexerError, "Not an include statement: %s" % line
642
643 # Get the filename from the line.
644 file = m.group(1)
645 file = self.expand_string(file)
646
647 # Include the content of the file.
648 self.include(file)
649
650 # Go on to next line.
651 self._lineno += 1
652
653 def parse_export(self):
654 k, v = self.parse_definition(pattern, LEXER_EXPORT)
655
656 if k and not k in self.exports:
657 self.exports.append(k)
658
659 def parse_unexport(self):
660 line = self.get_line(self._lineno)
661 self._lineno += 1
662
663 m = re.match(LEXER_UNEXPORT, line)
664 if m:
665 k = m.group(1)
666 if k and k in self.exports:
667 self.exports.remove(k)
668
669 def parse_packages(self):
670 keys, lines = self.read_block(
671 pattern_start=LEXER_PACKAGES_BEGIN,
672 pattern_line=LEXER_PACKAGES_LINE,
673 pattern_end=LEXER_PACKAGES_END,
674 raw=True,
675 )
676
677 pkgs = PackagesLexer(lines, parent=self)
678 self.packages.inherit(pkgs)
679
680
681 class PackagesLexer(DefaultLexer):
682 def init(self, environ):
683 # A place to store all templates.
684 self.templates = {}
685
686 # A place to store all packages.
687 self.packages = []
688
689 def inherit(self, other):
690 # Copy all templates and packages but make sure
691 # to update the parent lexer (for accessing each other).
692 for name, template in other.templates.items():
693 template.parent = self
694 self.templates[name] = template
695
696 for pkg in other.packages:
697 pkg.parent = self
698 self.packages.append(pkg)
699
700 def __iter__(self):
701 return iter(self.packages)
702
703 def get_parsers(self):
704 return [
705 (LEXER_TEMPLATE_BEGIN, self.parse_template),
706 (LEXER_PACKAGE_BEGIN, self.parse_package),
707 ]
708
709 def parse_template(self):
710 line = self.get_line(self._lineno)
711
712 m = re.match(LEXER_TEMPLATE_BEGIN, line)
713 if not m:
714 raise Exception, "Not a template"
715
716 # Line was correctly parsed, can go on.
717 self._lineno += 1
718
719 name = m.group(1)
720 lines = []
721
722 while True:
723 line = self.get_line(self._lineno)
724
725 m = re.match(LEXER_TEMPLATE_END, line)
726 if m:
727 self._lineno += 1
728 break
729
730 m = re.match(LEXER_TEMPLATE_LINE, line)
731 if m:
732 lines.append(m.group(1))
733 self._lineno += 1
734
735 # Accept empty lines.
736 m = re.match(LEXER_EMPTY_LINE, line)
737 if m:
738 lines.append(line)
739 self._lineno += 1
740 continue
741
742 template = TemplateLexer(lines, parent=self)
743 self.templates[name] = template
744
745 def parse_package(self):
746 line = self.get_line(self._lineno)
747
748 m = re.match(LEXER_PACKAGE_BEGIN, line)
749 if not m:
750 raise Exception, "Not a package: %s" %line
751
752 self._lineno += 1
753
754 name = m.group(1)
755 name = self.expand_string(name)
756
757 m = re.match(LEXER_VALID_PACKAGE_NAME, name)
758 if not m:
759 raise LexerError, "Invalid package name: %s" % name
760
761 lines = ["_name = %s" % name]
762
763 opened = False
764 while len(self.lines) > self._lineno:
765 line = self.get_line(self._lineno)
766
767 m = re.match(LEXER_PACKAGE_END, line)
768 if m:
769 opened = False
770 self._lineno += 1
771 break
772
773 m = re.match(LEXER_PACKAGE_LINE, line)
774 if m:
775 self._lineno += 1
776 lines.append(m.group(1))
777 opened = True
778 continue
779
780 # Accept empty lines.
781 m = re.match(LEXER_EMPTY_LINE, line)
782 if m:
783 self._lineno += 1
784 lines.append(line)
785 continue
786
787 # If there is an unhandled line in a block, we raise an error.
788 if opened:
789 raise Exception, "XXX unhandled line in package block: %s" % line
790
791 # If the block was never opened, we just go on.
792 else:
793 break
794
795 if opened:
796 raise LexerError, "Unclosed package block '%s'." % name
797
798 package = PackageLexer(lines, parent=self)
799 self.packages.append(package)
800
801
802 class FileLexer(DefaultLexer):
803 def init(self, environ):
804 self.build = DefaultLexer()
805 self.deps = DefaultLexer()
806 self.distro = DefaultLexer()
807 self.package = DefaultLexer()
808
809 def get_parsers(self):
810 return [
811 (LEXER_BUILD_BEGIN, self.parse_build),
812 (LEXER_DISTRO_BEGIN, self.parse_distro),
813 (LEXER_PACKAGE2_BEGIN, self.parse_package),
814 (LEXER_DEPS_BEGIN, self.parse_deps),
815 ]
816
817 def parse_build(self):
818 keys, lines = self.read_block(
819 pattern_start=LEXER_BUILD_BEGIN,
820 pattern_line=LEXER_BUILD_LINE,
821 pattern_end=LEXER_BUILD_END,
822 raw=True,
823 )
824
825 build = DefaultLexer(lines)
826 self.build.inherit(build)
827
828 def parse_distro(self):
829 keys, lines = self.read_block(
830 pattern_start=LEXER_DISTRO_BEGIN,
831 pattern_line=LEXER_DISTRO_LINE,
832 pattern_end=LEXER_DISTRO_END,
833 raw=True,
834 )
835
836 distro = DefaultLexer(lines)
837 self.distro.inherit(distro)
838
839 def parse_package(self):
840 keys, lines = self.read_block(
841 pattern_start=LEXER_PACKAGE2_BEGIN,
842 pattern_line=LEXER_PACKAGE2_LINE,
843 pattern_end=LEXER_PACKAGE2_END,
844 raw=True,
845 )
846
847 pkg = DefaultLexer(lines)
848 self.package.inherit(pkg)
849
850 def parse_deps(self):
851 keys, lines = self.read_block(
852 pattern_start=LEXER_DEPS_BEGIN,
853 pattern_line=LEXER_DEPS_LINE,
854 pattern_end=LEXER_DEPS_END,
855 raw=True,
856 )
857
858 deps = DefaultLexer(lines)
859 self.deps.inherit(deps)