1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
| def parallel_extract(self): t0 = time.time() for extract_file, save_to_dir in self.parallel_extract_list:
df = pd.read_csv(extract_file) dfs = np.array_split(df, self.world_size) gpu_ids = GPUtil.getAvailable(maxMemory=0.02, limit=self.world_size) processes = [] for rank, gpu_id in enumerate(gpu_ids): p = Process(target=self._parallel_extract, args=(f'{save_to_dir}/res_{rank}.h5', dfs[rank], gpu_id, rank)) p.start() print(f'process {rank} has started') processes.append(p) for p in processes: p.join() print(f'total time is {(time.time() - t0) / 60}')
def _parallel_extract(self, save_to_dir, csv_file, gpu_id, rank): self.device = torch.device(gpu_id) self.params["embedding_model"].to(self.device) test_dataloader = self.dataio_prep(csv_file) with h5py.File(save_to_dir, self.mode) as fw: fw['X'], fw['n_frames'], fw['spk_ids'], fw['spk_path'] = self._extraction(save_to_dir, test_dataloader) print(f'saving embedding')
|