sample/prism/multiplex_constants.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138

# This script indexes the classes and modules within a set of files using the
# saved source functionality.

require "prism"
require "etc"
require "tempfile"

module Indexer
  # A class that implements the #enter functionality so that it can be passed to
  # the various save* APIs. This effectively bundles up all of the node_id and
  # field_name pairs so that they can be written back to the parent process.
  class Repository
    attr_reader :scope, :entries

    def initialize
      @scope = []
      @entries = []
    end

    def with(next_scope)
      previous_scope = scope
      @scope = scope + next_scope
      yield
      @scope = previous_scope
    end

    def empty?
      entries.empty?
    end

    def enter(node_id, field_name)
      entries << [scope.join("::"), node_id, field_name]
    end
  end

  # Visit the classes and modules in the AST and save their locations into the
  # repository.
  class Visitor < Prism::Visitor
    attr_reader :repository

    def initialize(repository)
      @repository = repository
    end

    def visit_class_node(node)
      repository.with(node.constant_path.full_name_parts) do
        node.constant_path.save_location(repository)
        visit(node.body)
      end
    end

    def visit_module_node(node)
      repository.with(node.constant_path.full_name_parts) do
        node.constant_path.save_location(repository)
        visit(node.body)
      end
    end
  end

  # Index the classes and modules within a file. If there are any entries,
  # return them as a serialized string to the parent process.
  def self.index(filepath)
    repository = Repository.new
    Prism.parse_file(filepath).value.accept(Visitor.new(repository))
    "#{filepath}|#{repository.entries.join("|")}" unless repository.empty?
  end
end

def index_glob(glob, count = Etc.nprocessors - 1)
  process_ids = []
  filepath_writers = []
  index_reader, index_writer = IO.pipe

  # For each number in count, fork off a worker that has access to two pipes.
  # The first pipe is the index_writer, to which it writes all of the results of
  # indexing the various files. The second pipe is the filepath_reader, from
  # which it reads the filepaths that it needs to index.
  count.times do
    filepath_reader, filepath_writer = IO.pipe

    process_ids << fork do
      filepath_writer.close
      index_reader.close

      while (filepath = filepath_reader.gets(chomp: true))
        results = Indexer.index(filepath)
        index_writer.puts(results) if results
      end
    end

    filepath_reader.close
    filepath_writers << filepath_writer
  end

  index_writer.close

  # In a separate thread, write all of the filepaths to the various worker
  # processes. This is done in a separate threads since puts will eventually
  # block when each of the pipe buffers fills up. We write in a round-robin
  # fashion to the various workers. This could be improved using a work-stealing
  # algorithm, but is fine if you don't end up having a ton of variety in the
  # size of your files.
  writer_thread =
    Thread.new do
      Dir[glob].each_with_index do |filepath, index|
        filepath_writers[index % count].puts(filepath)
      end
    end

  index = Hash.new { |hash, key| hash[key] = [] }

  # In a separate thread, read all of the results from the various worker
  # processes and store them in the index. This is done in a separate thread so
  # that reads and writes can be interleaved. This is important so that the
  # index pipe doesn't fill up and block the writer.
  reader_thread =
    Thread.new do
      while (line = index_reader.gets(chomp: true))
        filepath, *entries = line.split("|")
        repository = Prism::Relocation.filepath(filepath).filepath.lines.code_unit_columns(Encoding::UTF_16LE).leading_comments

        entries.each_slice(3) do |(name, node_id, field_name)|
          index[name] << repository.enter(Integer(node_id), field_name.to_sym)
        end
      end
    end

  writer_thread.join
  filepath_writers.each(&:close)

  reader_thread.join
  index_reader.close

  process_ids.each { |process_id| Process.wait(process_id) }
  index
end

index_glob(File.expand_path("../../lib/**/*.rb", __dir__))