27int main(
int argc,
char **argv) {
28 if (argc < 3 ||
"-h" == std::string(argv[1]) ||
"--help" == std::string(argv[1])) {
29 std::cerr <<
"\nusage: hadd [-a] [-f] [-f[0-9]] [-fk] [-ff] [-k] [-O] [-T] [-v V] "
30 <<
"[-j J] [-dbg] [-d D] [-n N] [-cachesize CACHESIZE] [-experimental-io-features "
31 <<
"EXPERIMENTAL_IO_FEATURES] TARGET SOURCES \n\n"
32 <<
"This program will add histograms, trees and other objects from a list\n"
33 <<
"of ROOT files and write them to a target ROOT file. The target file is\n"
34 <<
"newly created and must not exist, or if -f (\" force \") is given, must\n"
35 <<
"not be one of the source files.\n\n"
36 <<
"It is copied from ROOT source file and linked artemis library\n";
37 return (argc == 2 && (
"-h" == std::string(argv[1]) ||
"--help" == std::string(argv[1])))
42 ROOT::TIOFeatures features;
43 Bool_t append = kFALSE;
44 Bool_t force = kFALSE;
45 Bool_t skip_errors = kFALSE;
46 Bool_t reoptimize = kFALSE;
47 Bool_t noTrees = kFALSE;
48 Bool_t keepCompressionAsIs = kFALSE;
49 Bool_t useFirstInputCompression = kFALSE;
50 Bool_t multiproc = kFALSE;
51 Bool_t debug = kFALSE;
52 Int_t maxopenedfiles = 0;
56 gSystem->GetSysInfo(&s);
57 auto nProcesses = s.fCpus;
58 auto workingDir = gSystem->TempDirectory();
62 for (
int a = 1; a < argc; ++a) {
63 if (strcmp(argv[a],
"-T") == 0) {
66 }
else if (strcmp(argv[a],
"-a") == 0) {
69 }
else if (strcmp(argv[a],
"-f") == 0) {
72 }
else if (strcmp(argv[a],
"-k") == 0) {
75 }
else if (strcmp(argv[a],
"-O") == 0) {
78 }
else if (strcmp(argv[a],
"-dbg") == 0) {
82 }
else if (strcmp(argv[a],
"-d") == 0) {
83 if (a + 1 != argc && argv[a + 1][0] !=
'-') {
84 if (gSystem->AccessPathName(argv[a + 1])) {
85 std::cerr <<
"Error: could not access the directory specified: " << argv[a + 1]
86 <<
". We will use the system's temporal directory.\n";
88 workingDir = argv[a + 1];
93 std::cout <<
"-d: no directory specified. We will use the system's temporal directory.\n";
96 }
else if (strcmp(argv[a],
"-j") == 0) {
98 if (a + 1 != argc && argv[a + 1][0] !=
'-') {
101 for (
char *c = argv[a + 1]; *c !=
'\0'; ++c) {
104 std::cerr <<
"Error: could not parse the number of processes to run in parallel passed after -j: "
105 << argv[a + 1] <<
". We will use the system maximum.\n";
111 request = strtol(argv[a + 1], 0, 10);
112 if (request < kMaxLong && request >= 0) {
113 nProcesses = (Int_t)request;
116 std::cout <<
"Parallelizing with " << nProcesses <<
" processes.\n";
118 std::cerr <<
"Error: could not parse the number of processes to use passed after -j: " << argv[a + 1]
119 <<
". We will use the default value (number of logical cores).\n";
125 }
else if (strcmp(argv[a],
"-cachesize=") == 0) {
127 static constexpr size_t arglen = std::char_traits<char>::length(
"-cachesize=");
128 auto parseResult = ROOT::FromHumanReadableSize(argv[a] + arglen, size);
129 if (parseResult == ROOT::EFromHumanReadableSize::kParseFail) {
130 std::cerr <<
"Error: could not parse the cache size passed after -cachesize: "
131 << argv[a + 1] <<
". We will use the default value.\n";
132 }
else if (parseResult == ROOT::EFromHumanReadableSize::kOverflow) {
134 const char *munit =
nullptr;
135 ROOT::ToHumanReadableSize(INT_MAX,
false, &m, &munit);
136 std::cerr <<
"Error: the cache size passed after -cachesize is too large: "
137 << argv[a + 1] <<
" is greater than " << m << munit
138 <<
". We will use the default value.\n";
140 cacheSize =
"cachesize=";
141 cacheSize.Append(argv[a] + 1);
144 }
else if (strcmp(argv[a],
"-cachesize") == 0) {
146 std::cerr <<
"Error: no cache size number was provided after -cachesize.\n";
149 auto parseResult = ROOT::FromHumanReadableSize(argv[a + 1], size);
150 if (parseResult == ROOT::EFromHumanReadableSize::kParseFail) {
151 std::cerr <<
"Error: could not parse the cache size passed after -cachesize: "
152 << argv[a + 1] <<
". We will use the default value.\n";
153 }
else if (parseResult == ROOT::EFromHumanReadableSize::kOverflow) {
155 const char *munit =
nullptr;
156 ROOT::ToHumanReadableSize(INT_MAX,
false, &m, &munit);
157 std::cerr <<
"Error: the cache size passed after -cachesize is too large: "
158 << argv[a + 1] <<
" is greater than " << m << munit
159 <<
". We will use the default value.\n";
163 cacheSize =
"cachesize=";
164 cacheSize.Append(argv[a + 1]);
170 }
else if (!strcmp(argv[a],
"-experimental-io-features")) {
172 std::cerr <<
"Error: no IO feature was specified after -experimental-io-features; ignoring\n";
174 std::stringstream ss;
178 while (std::getline(ss, item,
',')) {
179 if (!features.Set(item)) {
180 std::cerr <<
"Ignoring unknown feature request: " << item << std::endl;
185 }
else if (strcmp(argv[a],
"-n") == 0) {
187 std::cerr <<
"Error: no maximum number of opened was provided after -n.\n";
189 Long_t request = strtol(argv[a + 1], 0, 10);
190 if (request < kMaxLong && request >= 0) {
191 maxopenedfiles = (Int_t)request;
195 std::cerr <<
"Error: could not parse the max number of opened file passed after -n: " << argv[a + 1] <<
". We will use the system maximum.\n";
199 }
else if (strcmp(argv[a],
"-v") == 0) {
200 if (a + 1 == argc || argv[a + 1][0] ==
'-') {
206 Bool_t hasFollowupNumber = kTRUE;
207 for (
char *c = argv[a + 1]; *c !=
'\0'; ++c) {
210 hasFollowupNumber = kFALSE;
214 if (hasFollowupNumber) {
215 Long_t request = strtol(argv[a + 1], 0, 10);
216 if (request < kMaxLong && request >= 0) {
217 verbosity = (Int_t)request;
222 std::cerr <<
"Error: could not parse the verbosity level passed after -v: " << argv[a + 1] <<
". We will use the default value (99).\n";
227 }
else if (argv[a][0] ==
'-') {
229 if (force && argv[a][1] ==
'f') {
231 std::cerr <<
"Error: Using option " << argv[a] <<
" more than once is not supported.\n";
235 const char *prefix =
"";
236 if (argv[a][1] ==
'f' && argv[a][2] ==
'k') {
239 keepCompressionAsIs = kTRUE;
242 if (argv[a][1] ==
'f' && argv[a][2] ==
'f') {
245 useFirstInputCompression = kTRUE;
246 if (argv[a][3] !=
'\0') {
247 std::cerr <<
"Error: option -ff should not have any suffix: " << argv[a] <<
" (suffix has been ignored)\n";
251 for (
int alg = 0; !useFirstInputCompression && alg <= 5; ++alg) {
252 for (
int j = 0; j <= 9; ++j) {
253 const int comp = (alg * 100) + j;
254 snprintf(ft, 7,
"-f%s%d", prefix, comp);
255 if (!strcmp(argv[a], ft)) {
265 std::cerr <<
"Error: option " << argv[a] <<
" is not a supported option.\n";
268 }
else if (!outputPlace) {
273 gSystem->Load(
"libTreePlayer");
275 const char *targetname = 0;
277 targetname = argv[outputPlace];
279 targetname = argv[ffirst - 1];
283 std::cout <<
"chadd Target file: " << targetname << std::endl;
286 TFileMerger fileMerger(kFALSE, kFALSE);
287 fileMerger.SetMsgPrefix(
"chadd");
288 fileMerger.SetPrintLevel(verbosity - 1);
289 if (maxopenedfiles > 0) {
290 fileMerger.SetMaxOpenedFiles(maxopenedfiles);
295 std::vector<std::string> allSubfiles;
296 for (
int a = ffirst; a < argc; ++a) {
297 if (a == outputPlace)
299 if (argv[a] && argv[a][0] ==
'@') {
300 std::ifstream indirect_file(argv[a] + 1);
301 if (!indirect_file.is_open()) {
302 std::cerr <<
"chadd could not open indirect file " << (argv[a] + 1) << std::endl;
307 while (indirect_file) {
308 if (std::getline(indirect_file, line) && line.length()) {
309 if (gSystem->AccessPathName(line.c_str(), kReadPermission) == kTRUE) {
310 std::cerr <<
"chadd could not validate the file name \"" << line <<
"\" within indirect file "
311 << (argv[a] + 1) << std::endl;
315 allSubfiles.emplace_back(line);
320 const std::string line = argv[a];
321 if (gSystem->AccessPathName(line.c_str(), kReadPermission) == kTRUE) {
322 std::cerr <<
"chadd could not validate argument \"" << line <<
"\" as input file " << std::endl;
326 allSubfiles.emplace_back(line);
329 if (allSubfiles.empty()) {
330 std::cerr <<
"chadd could not find any valid input file " << std::endl;
335 if (useFirstInputCompression || keepCompressionAsIs) {
337 TFile *firstInput = TFile::Open(allSubfiles.front().c_str());
338 if (firstInput && !firstInput->IsZombie())
339 newcomp = firstInput->GetCompressionSettings();
341 newcomp = ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault;
343 fileMerger.SetMergeOptions(TString(
"first_source_compression"));
345 newcomp = ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault;
346 fileMerger.SetMergeOptions(TString(
"default_compression"));
350 if (keepCompressionAsIs && !reoptimize)
351 std::cout <<
"chadd compression setting for meta data: " << newcomp <<
'\n';
353 std::cout <<
"chadd compression setting for all output: " << newcomp <<
'\n';
356 if (!fileMerger.OutputFile(targetname,
"UPDATE", newcomp)) {
357 std::cerr <<
"chadd error opening target file for update :" << argv[ffirst - 1] <<
"." << std::endl;
360 }
else if (!fileMerger.OutputFile(targetname, force, newcomp)) {
361 std::cerr <<
"chadd error opening target file (does " << argv[ffirst - 1] <<
" exist?)." << std::endl;
363 std::cerr <<
"Pass \"-f\" argument to force re-creation of output file." << std::endl;
367 auto step = (allSubfiles.size() + nProcesses - 1) / nProcesses;
368 if (multiproc && step < 3) {
371 nProcesses = (allSubfiles.size() + step - 1) / step;
372 std::cout <<
"Each process should handle at least 3 files for efficiency.";
373 std::cout <<
" Setting the number of processes to: " << nProcesses << std::endl;
378 std::vector<std::string> partialFiles;
386 auto partialTail = uuid.AsString();
387 for (
auto i = 0; (i * step) < allSubfiles.size(); i++) {
388 std::stringstream buffer;
389 buffer << workingDir <<
"/partial" << i <<
"_" << partialTail <<
".root";
390 partialFiles.emplace_back(buffer.str());
395 auto mergeFiles = [&](TFileMerger &merger) {
397 merger.SetFastMethod(kFALSE);
399 if (!keepCompressionAsIs && merger.HasCompressionChange()) {
401 std::cout <<
"chadd Sources and Target have different compression settings\n";
402 std::cout <<
"chadd merging will be slower" << std::endl;
405 merger.SetNotrees(noTrees);
406 merger.SetMergeOptions(TString(merger.GetMergeOptions()) +
" " + cacheSize);
407 merger.SetIOFeatures(features);
410 status = merger.PartialMerge(TFileMerger::kIncremental | TFileMerger::kAll);
412 status = merger.Merge();
416 auto sequentialMerge = [&](TFileMerger &merger,
int start,
int nFiles) {
417 for (
auto i = start; i < (start + nFiles) && i < static_cast<int>(allSubfiles.size()); i++) {
418 if (!merger.AddFile(allSubfiles[i].c_str())) {
420 std::cerr <<
"chadd skipping file with error: " << allSubfiles[i] << std::endl;
422 std::cerr <<
"chadd exiting due to error in " << allSubfiles[i] << std::endl;
427 return mergeFiles(merger);
430 auto parallelMerge = [&](
int start) {
431 TFileMerger mergerP(kFALSE, kFALSE);
432 mergerP.SetMsgPrefix(
"chadd");
433 mergerP.SetPrintLevel(verbosity - 1);
434 if (maxopenedfiles > 0) {
435 mergerP.SetMaxOpenedFiles(maxopenedfiles / nProcesses);
437 if (!mergerP.OutputFile(partialFiles[start / step].c_str(), newcomp)) {
438 std::cerr <<
"chadd error opening target partial file" << std::endl;
441 return sequentialMerge(mergerP, start, step);
444 auto reductionFunc = [&]() {
445 for (
const auto &pf : partialFiles) {
446 fileMerger.AddFile(pf.c_str());
448 return mergeFiles(fileMerger);
455 ROOT::TProcessExecutor p(nProcesses);
456 auto res = p.Map(parallelMerge, ROOT::TSeqI(0, allSubfiles.size(), step));
457 status = std::accumulate(res.begin(), res.end(), 0U) == partialFiles.size();
459 status = reductionFunc();
461 std::cout <<
"chadd failed at the parallel stage" << std::endl;
464 for (
const auto &pf : partialFiles) {
465 gSystem->Unlink(pf.c_str());
469 status = sequentialMerge(fileMerger, 0, allSubfiles.size());
472 status = sequentialMerge(fileMerger, 0, allSubfiles.size());
476 if (verbosity == 1) {
477 std::cout <<
"chadd merged " << allSubfiles.size() <<
" (" << fileMerger.GetMergeList()->GetEntries()
478 <<
") input (partial) files into " << targetname <<
".\n";
482 if (verbosity == 1) {
483 std::cout <<
"chadd failure during the merge of " << allSubfiles.size() <<
" ("
484 << fileMerger.GetMergeList()->GetEntries() <<
") input (partial) files into " << targetname <<
".\n";