【darknet源码】:检测任务训练过程源码追踪
1.src/darknet.c/main()
通过解析train.sh中的批处理指令,遇到detector进入run_detector函数。
else if (0 == strcmp(argv[1], "detector")){
run_detector(argc, argv);
2.src/detector.c/run_detector()
通过解析train.sh的批处理指令,遇到train进入train_detector函数。
else if(0==strcmp(argv[2], "train"))
train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
run_detector函数代码
void run_detector(int argc, char **argv)
{
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
float thresh = find_float_arg(argc, argv, "-thresh", .24);
float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
int cam_index = find_int_arg(argc, argv, "-c", 0);
int frame_skip = find_int_arg(argc, argv, "-s", 0);
int avg = find_int_arg(argc, argv, "-avg", 3);
if(argc < 4){
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
return;
}
// 解析输入参数,获取GPU使用情况,如果使用单个GPU,那么调用时不需要指明GPU卡号,默认使用卡号0上的GPU;
// 如果使用多块GPU,那么在调用时,其中有两个参数必须为:-gpus 0,1,2...(以逗号隔开)
// 前者指明是GPU卡号参数,后者为多块GPU的卡号,find_char_arg就是将0,1,2...读入gpu_list
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
char *outfile = find_char_arg(argc, argv, "-out", 0);
// 整型数组,为所有使用GPU的卡号集合(如果使用多个GPU,那么会有0,1,2..多个值,
// 如果只使用一块GPU,那么只有一个元素值0,这是默认卡号;如果不使用GPU,那么以下3个参数最终其实没有用到,
// 只是为了统一接口,这三个参数不能删)
int *gpus = 0;
int gpu = 0;
int ngpus = 0;
// 如果不用GPU进行训练,会执行else语句,此时ngpus=1,这不是说还是会使用GPU,因为在真正调用使用GPU的函数之前,
// 还是会判断#ifdef GPU,如果没有定义,即使ngpus=1也不会使用GPU进行训练;此外,ngpus是最终网络划分的个数,
// 这个参数不管使不使用GPU都会用到:使用多个GPU时,需要将网络划分成多个,分到各个GPU上进行训练;而使用一块GPU或者不使用GPU时,显然ngpus都得等于1
if(gpu_list){
printf("%s\n", gpu_list);
int len = strlen(gpu_list);
ngpus = 1;
int i;
for(i = 0; i < len; ++i){
if (gpu_list[i] == ',') ++ngpus;
}
gpus = calloc(ngpus, sizeof(int));
for(i = 0; i < ngpus; ++i){
gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',')+1;
}
} else {
gpu = gpu_index;
gpus = &gpu;
ngpus = 1;
}
int clear = find_arg(argc, argv, "-clear");
int fullscreen = find_arg(argc, argv, "-fullscreen");
int width = find_int_arg(argc, argv, "-w", 0);
int height = find_int_arg(argc, argv, "-h", 0);
int fps = find_int_arg(argc, argv, "-fps", 0);
char *datacfg = argv[3];
char *cfg = argv[4];
char *weights = (argc > 5) ? argv[5] : 0;
char *filename = (argc > 6) ? argv[6]: 0;
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
else if(0==strcmp(argv[2], "demo")) {
list *options = read_data_cfg(datacfg);
int classes = option_find_int(options, "classes", 20);
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen);
}
}
3.src/detector.c/train_detector()
该函数中执行了整个的训练过程,该函数中对训练而言最终要的部分有这么几个:
- load_network:解析cfg参数,导入权重。
- load_data:导入训练数据。
train_network
下面依次介绍这三个函数功能。
train_detector函数代码/
图像检测网络训练函数(针对图像检测的网络训练) 输入: datacfg 训练数据描述信息文件路径及名称
cfgfile 神经网络结构配置文件路径及名称 weightfile 预训练参数文件路径及名称
gpus GPU卡号集合(比如使用1块GPU,那么里面只含0元素,默认使用0卡号GPU;如果使用4块GPU,那么含有0,1,2,3四个元素;如果不使用GPU,那么为空指针) ngpus 使用GPUS块数,使用一块GPU和不使用GPU时,nqpus都等于1
clear 说明:关于预训练参数文件weightfile, /
void train_detector(char datacfg, char cfgfile, char weightfile, int gpus, int ngpus, int clear)
{//读入数据配置文件信息
list *options = read_data_cfg(datacfg);
// 从options找出训练图片路径信息,如果没找到,默认使用"data/train.list"路径下的图片信息(train.list含有标准的信息格式:<object-class> <x> <y> <width> <height>),
// 该文件可以由darknet提供的scripts/voc_label.py根据自行在网上下载的voc数据集生成,所以说是默认路径,其实也需要使用者自行调整,也可以任意命名,不一定要为train.list,
// 甚至可以不用voc_label.py生成,可以自己不厌其烦的制作一个(当然规模应该是很小的,不然太累了。。。)
// 读入后,train_images将含有训练图片中所有图片的标签以及定位信息
char *train_images = option_find_str(options, "train", "data/train.list");
char *backup_directory = option_find_str(options, "backup", "/backup/");
// 为什么两次用srand(time(0))。。。
srand(time(0));
// 提取配置文件名称中的主要信息,用于输出打印(并无实质作用),比如提取cfg/yolo.cfg中的yolo,用于下面的输出打印
char *base = basecfg(cfgfile);
printf("%s\n", base);
float avg_loss = -1;
// 构建网络:用多少块GPU,就会构建多少个相同的网络(不使用GPU时,ngpus=1)
network *nets = calloc(ngpus, sizeof(network));
srand(time(0));
// 随机产生种子
int seed = rand();
int i;
// for循环次数为ngpus,使用多少块GPU,就循环多少次(不使用GPU时,ngpus=1,也会循环一次)
// 这里每一次循环都会构建一个相同的神经网络,如果提供了初始训练参数,也会为每个网络导入相同的初始训练参数
for(i = 0; i < ngpus; ++i){
// 再次设置随机种子,这样反反复复的设置随机种子是有什么深意吗?
srand(seed);
ifdef GPU
// 设置当前活跃GPU卡号(即设置gpu_index=n,同时调用cudaSetDevice函数设置当前活跃的GPU卡号)
cuda_set_device(gpus[i]);
endif
nets[i] = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&nets[i], weightfile);
}
if(clear) *nets[i].seen = 0;
nets[i].learning_rate *= ngpus;
}
srand(time(0));
network net = nets[0];
int imgs = net.batch * net.subdivisions * ngpus;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
data train, buffer;
layer l = net.layers[net.n - 1];
int classes = l.classes;
float jitter = l.jitter;
list *plist = get_paths(train_images);
//int N = plist->size;
char **paths = (char **)list_to_array(plist);
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
args.classes = classes;
args.jitter = jitter;
args.num_boxes = l.max_boxes;
args.d = &buffer;
args.type = DETECTION_DATA;
args.threads = 8;
args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
pthread_t load_thread = load_data(args);
clock_t time;
int count = 0;
//while(i*imgs < N*120){
while(get_current_batch(net) < net.max_batches){
if(l.random && count++%10 == 0){
printf("Resizing\n");
int dim = (rand() % 10 + 10) * 32;
if (get_current_batch(net)+200 > net.max_batches) dim = 608;
//int dim = (rand() % 4 + 16) * 32;
printf("%d\n", dim);
args.w = dim;
args.h = dim;
pthread_join(load_thread, 0);
train = buffer;
free_data(train);
load_thread = load_data(args);
for(i = 0; i < ngpus; ++i){
resize_network(nets + i, dim, dim);
}
net = nets[0];
}
time=clock();
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data(args);
/* int k; for(k = 0; k < l.max_boxes; ++k){ box b = float_to_box(train.y.vals[10] + 1 + k*5); if(!b.x) break; printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); } */
/*
int zz;
for(zz = 0; zz < train.X.cols; ++zz){
image im = float_to_image(net.w, net.h, 3, train.X.vals[zz]);
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[zz] + k*5);
printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);
draw_bbox(im, b, 1, 1,0,0);
}
show_image(im, "truth11");
cvWaitKey(0);
save_image(im, "truth11");
}
*/
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
float loss = 0;
ifdef GPU
if(ngpus == 1){
loss = train_network(net, train);
} else {
loss = train_networks(nets, ngpus, train, 4);
}
else
loss = train_network(net, train);
endif
if (avg_loss < 0) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
i = get_current_batch(net);
printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
if(i%1000==0){
ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
endif
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);
}
if(i%10000==0 || (i < 1000 && i%100 == 0)){
ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
endif
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
}
free_data(train);
}
ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
endif
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
}
3.1 src/detector.c/train_detector()/load_network()
额。。。好吧这个版本没有load_network函数,因为我之前看的是yolov3版本的darknet,那个版本把解析cfg文件的操作封装在load_netwrk()函数里了。而在这里并没有封装,但两者基本没什么差别,代码分别如下:
old_version
nets[i] = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&nets[i], weightfile);
}
if(clear) *nets[i].seen = 0;
nets[i].learning_rate *= ngpus;
2.new_version
而包含load_network的函数源码如下:
nets[i] = load_network(cfgfile, weightfile, clear);
nets[i]->learning_rate *= ngpus;
//////src/network.c
network *load_network(char *cfg, char *weights, int clear)
{
network *net = parse_network_cfg(cfg);
if(weights && weights[0]!=0){
load_weights(net,weights);
}
if(clear)(*net->seen) = 0;
return net;
}
这两个版本想表达的意思是一致的,就是用parse_network_cfg解析cfg参数,用load_weight导入权重。
src/parser.c/parse_network_cfg()
具体读取cfg的操作请看:【darknet源码】:读取cfg文件源码追踪
具体解析cfg的解析请看:【darknet源码】:解析cfg文件源码追踪
src/parser.c/load_weights()
具体导入权重代码的操作请看:【darknet源码】:导入各个层的权重
3.2 src/detector.c/train_detector()/load_data()
具体导入数据代码的操作请看:【darknet源码】:导入训练数据
3.3 src/detector.c/train_detector()/train_network()
具体训练网络代码的操作请看:【darknet源码】:网络前向,反向,更新源码
还没有评论,来说两句吧...