LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
data_reader_node2vec.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_DATA_READERS_NODE2VEC_HPP_INCLUDED
28 #define LBANN_DATA_READERS_NODE2VEC_HPP_INCLUDED
29 
30 #include "data_reader.hpp"
31 #ifdef LBANN_HAS_LARGESCALE_NODE2VEC
32 
33 namespace lbann {
34 
35 // Note (tym 4/8/20): Including largescale_node2vec in this header
36 // causes multiple definitions (I suspect it instantiates an object
37 // somewhere). However, node2vec_reader needs to store
38 // largescale_node2vec classes in unique_ptrs. To get around this, we
39 // implement derived classes in the source file and forward declare
40 // them in this header.
41 namespace node2vec_reader_impl {
42 class DistributedDatabase;
43 class EdgeWeightData;
44 class RandomWalker;
45 } // namespace node2vec_reader_impl
46 
62 class node2vec_reader : public generic_data_reader
63 {
64 public:
65  node2vec_reader(std::string graph_file,
66  size_t epoch_size,
67  size_t walk_length,
68  double return_param,
69  double inout_param,
70  size_t num_negative_samples);
71  node2vec_reader(const node2vec_reader&) = delete;
72  node2vec_reader& operator=(const node2vec_reader&) = delete;
73  ~node2vec_reader() override;
74  node2vec_reader* copy() const override;
75 
76  std::string get_type() const override;
77 
78  const std::vector<int> get_data_dims() const override;
79  int get_num_labels() const override;
80  int get_linearized_data_size() const override;
81  int get_linearized_label_size() const override;
82 
83  void load() override;
84 
85 protected:
86  bool fetch_data_block(CPUMat& X,
87  El::Int block_offset,
88  El::Int block_stride,
89  El::Int mb_size,
90  El::Matrix<El::Int>& indices_fetched) override;
91  bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override;
92 
93 private:
99  std::vector<std::vector<size_t>> run_walker(size_t num_walks,
100  const locked_io_rng_ref&);
101 
108  void update_noise_distribution();
109 
111  std::unique_ptr<node2vec_reader_impl::DistributedDatabase>
112  m_distributed_database;
114  std::unique_ptr<node2vec_reader_impl::EdgeWeightData> m_edge_weight_data;
116  std::unique_ptr<node2vec_reader_impl::RandomWalker> m_random_walker;
117 
127  std::deque<std::vector<size_t>> m_walks_cache;
128 
130  std::vector<size_t> m_local_vertex_global_indices;
135  std::unordered_map<size_t, size_t> m_local_vertex_local_indices;
136 
140  std::vector<size_t> m_local_vertex_visit_counts;
150  std::vector<double> m_local_vertex_noise_distribution;
151 
157  size_t m_total_visit_count{0};
161  size_t m_noise_visit_count{0};
162 
168  std::string m_graph_file;
169 
177  size_t m_epoch_size;
179  size_t m_walk_length;
181  double m_return_param;
183  double m_inout_param;
185  size_t m_num_negative_samples;
186 };
187 
188 } // namespace lbann
189 
190 #endif // LBANN_HAS_LARGESCALE_NODE2VEC
191 #endif // LBANN_DATA_READERS_NODE2VEC_HPP_INCLUDED
void load(std::string const &pbuf_filename, google::protobuf::Message &msg)
Fill the protobuf message from a binary file.
El::Matrix< DataType, El::Device::CPU > CPUMat
Definition: base.hpp:116