Class RDoc::C_Parser
In: parsers/parse_c.rb
Parent: Object

See rdoc/c_parse.rb

Methods

Public Class methods

prepare to parse a C file

[Source]

     # File parsers/parse_c.rb, line 175
175:     def initialize(top_level, file_name, body, options, stats)
176:       @known_classes = KNOWN_CLASSES.dup
177:       @body = handle_tab_width(handle_ifdefs_in(body))
178:       @options = options
179:       @stats   = stats
180:       @top_level = top_level
181:       @classes = Hash.new
182:       @file_dir = File.dirname(file_name)
183:       @progress = $stderr unless options.quiet
184:     end

Public Instance methods

[Source]

     # File parsers/parse_c.rb, line 412
412:     def do_aliases
413:       @body.scan(%r{rb_define_alias\s*\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do
414:         |var_name, new_name, old_name|
415:         @stats.num_methods += 1
416:         class_name = @known_classes[var_name] || var_name
417:         class_obj  = find_class(var_name, class_name)
418: 
419:         class_obj.add_alias(Alias.new("", old_name, new_name, ""))
420:       end
421:    end

[Source]

     # File parsers/parse_c.rb, line 275
275:     def do_classes
276:       @body.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do 
277:         |var_name, class_name|
278:         handle_class_module(var_name, "module", class_name, nil, nil)
279:       end
280:       
281:       # The '.' lets us handle SWIG-generated files
282:       @body.scan(/([\w\.]+)\s* = \s*rb_define_class\s*
283:                 \( 
284:                    \s*"(\w+)",
285:                    \s*(\w+)\s*
286:                 \)/mx) do 
287:         
288:         |var_name, class_name, parent|
289:         handle_class_module(var_name, "class", class_name, parent, nil)
290:       end
291:       
292:       @body.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do
293:         |var_name, class_name, parent|
294:         parent = nil if parent == "0"
295:         handle_class_module(var_name, "class", class_name, parent, nil)
296:       end
297: 
298:       @body.scan(/(\w+)\s* = \s*rb_define_module_under\s*
299:                 \( 
300:                    \s*(\w+),
301:                    \s*"(\w+)"
302:                 \s*\)/mx) do 
303:         
304:         |var_name, in_module, class_name|
305:         handle_class_module(var_name, "module", class_name, nil, in_module)
306:       end
307:       
308:       @body.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s*
309:                 \( 
310:                    \s*(\w+),
311:                    \s*"(\w+)",
312:                    \s*(\w+)\s*
313:                 \s*\)/mx) do 
314:         
315:         |var_name, in_module, class_name, parent|
316:         handle_class_module(var_name, "class", class_name, parent, in_module)
317:       end
318:       
319:     end

[Source]

     # File parsers/parse_c.rb, line 323
323:     def do_constants
324:       @body.scan(%r{\Wrb_define_
325:                      (
326:                         variable |
327:                         readonly_variable |
328:                         const |
329:                         global_const |
330:                       )
331:                  \s*\( 
332:                    (?:\s*(\w+),)?
333:                    \s*"(\w+)",
334:                    \s*(.*?)\s*\)\s*;
335:                    }xm) do
336:         
337:         |type, var_name, const_name, definition|
338:         var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel"
339:                                 handle_constants(type, var_name, const_name, definition)
340:       end
341:     end

Look for includes of the form

    rb_include_module(rb_cArray, rb_mEnumerable);

[Source]

     # File parsers/parse_c.rb, line 638
638:     def do_includes
639:       @body.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m|
640:         if cls = @classes[c]
641:           m = @known_classes[m] || m
642:           cls.add_include(Include.new(m, ""))
643:         end
644:       end
645:     end

[Source]

     # File parsers/parse_c.rb, line 345
345:     def do_methods
346: 
347:       @body.scan(%r{rb_define_
348:                      (
349:                         singleton_method |
350:                         method           |
351:                         module_function  |
352:                         private_method
353:                      )
354:                      \s*\(\s*([\w\.]+),
355:                        \s*"([^"]+)",
356:                        \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
357:                        \s*(-?\w+)\s*\)
358:                      (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
359:                    }xm) do
360:         |type, var_name, meth_name, meth_body, param_count, source_file|
361:        #" 
362: 
363:         # Ignore top-object and weird struct.c dynamic stuff
364:         next if var_name == "ruby_top_self" 
365:         next if var_name == "nstr"
366:         next if var_name == "envtbl"
367:         next if var_name == "argf"   # it'd be nice to handle this one
368: 
369:         var_name = "rb_cObject" if var_name == "rb_mKernel"
370:         handle_method(type, var_name, meth_name, 
371:                       meth_body, param_count, source_file)
372:       end
373: 
374:       @body.scan(%r{rb_define_attr\(
375:                                \s*([\w\.]+),
376:                                \s*"([^"]+)",
377:                                \s*(\d+),
378:                                \s*(\d+)\s*\);
379:                   }xm) do  #"
380:         |var_name, attr_name, attr_reader, attr_writer|
381:         
382:         #var_name = "rb_cObject" if var_name == "rb_mKernel"
383:         handle_attr(var_name, attr_name,
384:                     attr_reader.to_i != 0,
385:                     attr_writer.to_i != 0)
386:       end
387: 
388:       @body.scan(%r{rb_define_global_function\s*\(
389:                                \s*"([^"]+)",
390:                                \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
391:                                \s*(-?\w+)\s*\)
392:                   (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
393:                   }xm) do  #"
394:         |meth_name, meth_body, param_count, source_file|
395:         handle_method("method", "rb_mKernel", meth_name, 
396:                       meth_body, param_count, source_file)
397:       end
398:   
399:       @body.scan(/define_filetest_function\s*\(
400:                                \s*"([^"]+)",
401:                                \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
402:                                \s*(-?\w+)\s*\)/xm) do  #"
403:         |meth_name, meth_body, param_count|
404:         
405:         handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count)
406:         handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count)
407:       end
408:    end

[Source]

     # File parsers/parse_c.rb, line 489
489:     def find_attr_comment(attr_name)
490:       if @body =~ %r{((?>/\*.*?\*/\s+))
491:                      rb_define_attr\((?:\s*(\w+),)?\s*"#{attr_name}"\s*,.*?\)\s*;}xmi
492:         $1
493:       elsif @body =~ %r{Document-attr:\s#{attr_name}\s*?\n((?>.*?\*/))}m
494:         $1
495:       else
496:         ''
497:       end
498:     end

Find the C code corresponding to a Ruby method

[Source]

     # File parsers/parse_c.rb, line 548
548:     def find_body(meth_name, meth_obj, body, quiet = false)
549:       case body
550:       when %r{((?>/\*.*?\*/\s*))(?:static\s+)?VALUE\s+#{meth_name}
551:               \s*(\(.*?\)).*?^}xm
552:         comment, params = $1, $2
553:         body_text = $&
554: 
555:         # see if we can find the whole body
556:         
557:         re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}'
558:         if Regexp.new(re, Regexp::MULTILINE).match(body)
559:           body_text = $&
560:         end
561: 
562:         # The comment block may have been overridden with a
563:         # 'Document-method' block. This happens in the interpreter
564:         # when multiple methods are vectored through to the same
565:         # C method but those methods are logically distinct (for
566:         # example Kernel.hash and Kernel.object_id share the same
567:         # implementation
568: 
569:         override_comment = find_override_comment(meth_obj.name)
570:         comment = override_comment if override_comment
571: 
572:         find_modifiers(comment, meth_obj) if comment
573:         
574: #        meth_obj.params = params
575:         meth_obj.start_collecting_tokens
576:         meth_obj.add_token(RubyToken::Token.new(1,1).set_text(body_text))
577:         meth_obj.comment = mangle_comment(comment)
578:       when %r{((?>/\*.*?\*/\s*))^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m
579:         comment = $1
580:         find_body($2, meth_obj, body, true)
581:         find_modifiers(comment, meth_obj)
582:         meth_obj.comment = mangle_comment(comment) + meth_obj.comment
583:       when %r{^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m
584:         unless find_body($1, meth_obj, body, true)
585:           warn "No definition for #{meth_name}" unless quiet
586:           return false
587:         end
588:       else
589: 
590:         # No body, but might still have an override comment
591:         comment = find_override_comment(meth_obj.name)
592: 
593:         if comment
594:           find_modifiers(comment, meth_obj)
595:           meth_obj.comment = mangle_comment(comment)
596:         else
597:           warn "No definition for #{meth_name}" unless quiet
598:           return false
599:         end
600:       end
601:       true
602:     end

[Source]

     # File parsers/parse_c.rb, line 658
658:     def find_class(raw_name, name)
659:       unless @classes[raw_name]
660:         if raw_name =~ /^rb_m/ 
661:           @classes[raw_name] = @top_level.add_module(NormalModule, name)
662:         else
663:           @classes[raw_name] = @top_level.add_class(NormalClass, name, nil)
664:         end
665:       end
666:       @classes[raw_name]
667:     end

[Source]

     # File parsers/parse_c.rb, line 262
262:     def find_class_comment(class_name, class_meth)
263:       comment = nil
264:       if @body =~ %r{((?>/\*.*?\*/\s+))
265:                      (static\s+)?void\s+Init_#{class_name}\s*(?:_\(\s*)?\(\s*(?:void\s*)\)}xmi
266:         comment = $1
267:       elsif @body =~ %r{Document-(class|module):\s#{class_name}\s*?\n((?>.*?\*/))}m
268:         comment = $2
269:       end
270:       class_meth.comment = mangle_comment(comment) if comment
271:     end

[Source]

     # File parsers/parse_c.rb, line 446
446:     def find_const_comment(type, const_name)
447:       if @body =~ %r{((?>/\*.*?\*/\s+))
448:                      rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi
449:         $1
450:       elsif @body =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m
451:         $1
452:       else
453:         ''
454:       end
455:     end

If the comment block contains a section that looks like

   call-seq:
       Array.new
       Array.new(10)

use it for the parameters

[Source]

     # File parsers/parse_c.rb, line 612
612:     def find_modifiers(comment, meth_obj)
613:       if comment.sub!(/:nodoc:\s*^\s*\*?\s*$/m, '') or
614:          comment.sub!(/\A\/\*\s*:nodoc:\s*\*\/\Z/, '')
615:         meth_obj.document_self = false
616:       end
617:       if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '') or
618:          comment.sub!(/\A\/\*\s*call-seq:(.*?)\*\/\Z/, '')
619:         seq = $1
620:         seq.gsub!(/^\s*\*\s*/, '')
621:         meth_obj.call_seq = seq
622:       end
623:     end

[Source]

     # File parsers/parse_c.rb, line 627
627:     def find_override_comment(meth_name)
628:       name = Regexp.escape(meth_name)
629:       if @body =~ %r{Document-method:\s#{name}\s*?\n((?>.*?\*/))}m
630:         $1
631:       end
632:     end

[Source]

     # File parsers/parse_c.rb, line 459
459:     def handle_attr(var_name, attr_name, reader, writer)
460:       rw = ''
461:       if reader 
462:         #@stats.num_methods += 1
463:         rw << 'R'
464:       end
465:       if writer
466:         #@stats.num_methods += 1
467:         rw << 'W'
468:       end
469: 
470:       class_name = @known_classes[var_name]
471: 
472:       return unless class_name
473:       
474:       class_obj  = find_class(var_name, class_name)
475: 
476:       if class_obj
477:         comment = find_attr_comment(attr_name)
478:         unless comment.empty?
479:           comment = mangle_comment(comment)
480:         end
481:         att = Attr.new('', attr_name, rw, comment)
482:         class_obj.add_attribute(att)
483:       end
484: 
485:     end

[Source]

     # File parsers/parse_c.rb, line 222
222:     def handle_class_module(var_name, class_mod, class_name, parent, in_module)
223:       progress(class_mod[0, 1])
224: 
225:       parent_name = @known_classes[parent] || parent
226: 
227:       if in_module
228:         enclosure = @classes[in_module]
229:         unless enclosure
230:           if enclosure = @known_classes[in_module]
231:             handle_class_module(in_module, (/^rb_m/ =~ in_module ? "module" : "class"),
232:                                 enclosure, nil, nil)
233:             enclosure = @classes[in_module]
234:           end
235:         end
236:         unless enclosure
237:           warn("Enclosing class/module '#{in_module}' for " +
238:                 "#{class_mod} #{class_name} not known")
239:           return
240:         end
241:       else
242:         enclosure = @top_level
243:       end
244: 
245:       if class_mod == "class" 
246:         cm = enclosure.add_class(NormalClass, class_name, parent_name)
247:         @stats.num_classes += 1
248:       else
249:         cm = enclosure.add_module(NormalModule, class_name)
250:         @stats.num_modules += 1
251:       end
252:       cm.record_location(enclosure.toplevel)
253: 
254:       find_class_comment(cm.full_name, cm)
255:       @classes[var_name] = cm
256:       @known_classes[var_name] = cm.full_name
257:     end

[Source]

     # File parsers/parse_c.rb, line 425
425:     def handle_constants(type, var_name, const_name, definition)
426:       #@stats.num_constants += 1
427:       class_name = @known_classes[var_name]
428:       
429:       return unless class_name
430: 
431:       class_obj  = find_class(var_name, class_name)
432: 
433:       unless class_obj
434:         warn("Enclosing class/module '#{const_name}' for not known")
435:         return
436:       end
437:       
438:       comment = find_const_comment(type, const_name)
439: 
440:       con = Constant.new(const_name, definition, mangle_comment(comment))
441:       class_obj.add_constant(con)
442:     end

Remove ifdefs that would otherwise confuse us

[Source]

     # File parsers/parse_c.rb, line 683
683:     def handle_ifdefs_in(body)
684:       body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m) { $1 }
685:     end

[Source]

     # File parsers/parse_c.rb, line 502
502:     def handle_method(type, var_name, meth_name, 
503:                       meth_body, param_count, source_file = nil)
504:       progress(".")
505: 
506:       @stats.num_methods += 1
507:       class_name = @known_classes[var_name]
508: 
509:       return unless class_name
510: 
511:       class_obj  = find_class(var_name, class_name)
512:       
513:       if class_obj
514:         if meth_name == "initialize"
515:           meth_name = "new"
516:           type = "singleton_method"
517:         end
518:         meth_obj = AnyMethod.new("", meth_name)
519:         meth_obj.singleton = type == "singleton_method" 
520:         
521:         p_count = (Integer(param_count) rescue -1)
522:         
523:         if p_count < 0
524:           meth_obj.params = "(...)"
525:         elsif p_count == 0
526:           meth_obj.params = "()"
527:         else
528:           meth_obj.params = "(" +
529:                             (1..p_count).map{|i| "p#{i}"}.join(", ") + 
530:                                                 ")"
531:         end
532: 
533:         if source_file
534:           file_name = File.join(@file_dir, source_file)
535:           body = (@@known_bodies[source_file] ||= File.read(file_name))
536:         else
537:           body = @body
538:         end
539:         if find_body(meth_body, meth_obj, body) and meth_obj.document_self
540:           class_obj.add_method(meth_obj)
541:         end
542:       end
543:     end

[Source]

     # File parsers/parse_c.rb, line 669
669:     def handle_tab_width(body)
670:       if /\t/ =~ body
671:         tab_width = Options.instance.tab_width
672:         body.split(/\n/).map do |line|
673:           1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)}  && $~ #`
674:           line
675:         end .join("\n")
676:       else
677:         body
678:       end
679:     end

Remove the /*’s and leading asterisks from C comments

[Source]

     # File parsers/parse_c.rb, line 651
651:     def mangle_comment(comment)
652:       comment.sub!(%r{/\*+}) { " " * $&.length }
653:       comment.sub!(%r{\*+/}) { " " * $&.length }
654:       comment.gsub!(/^[ \t]*\*/m) { " " * $&.length }
655:       comment
656:     end

[Source]

     # File parsers/parse_c.rb, line 202
202:     def progress(char)
203:       unless @options.quiet
204:         @progress.print(char)
205:         @progress.flush
206:       end
207:     end

remove lines that are commented out that might otherwise get picked up when scanning for classes and methods

[Source]

     # File parsers/parse_c.rb, line 218
218:     def remove_commented_out_lines
219:       @body.gsub!(%r{//.*rb_define_}, '//')
220:     end

Extract the classes/modules and methods from a C file and return the corresponding top-level object

[Source]

     # File parsers/parse_c.rb, line 188
188:     def scan
189:       remove_commented_out_lines
190:       do_classes
191:       do_constants
192:       do_methods
193:       do_includes
194:       do_aliases
195:       @top_level
196:     end

[Source]

     # File parsers/parse_c.rb, line 209
209:     def warn(msg)
210:       $stderr.puts
211:       $stderr.puts msg
212:       $stderr.flush
213:     end

[Validate]