遇到的问题

1.ImportError: cannot import name ‘DtypeArg’ from ‘pandas._typing’

ImportError:无法从’pandas导入名称“DtypeArg” - 我爱学习网 (5axxw.com)

使用pip list查看

卸载重装即可。

2. File “/home/workspace/feifeizhang/anconda/lib/python3.7/site-packages/pandas/core/indexes/base.py”, line 3363, in get_loc raise KeyError(key) from err KeyError: ‘left_eye_coord’

1 2	self.le_coord_list = (root_dir + "/" + self.anno["left_eye_coord"]).tolist() self.re_coord_list = (root_dir + "/" + self.anno["right_eye_coord"]).tolist()

源代码中这两行被注释掉了，但是如果注释掉会出现

le_coor = np.load(self.le_coord_list[idx])

AttributeError: ‘GazePointAllDataset’ object has no attribute ‘le_coord_list’

解决办法

已经发现了数据组织结构的问题。看代码gaze_dataset.py发现数据组织格式有问题，运行GAZE/RG-BD-Gaze-master/code/data/data_check.py有

Traceback (most recent call last):
File “/home/workspace/feifeizhang/GAZE/RGBD-Gaze-master/code/data/data_check.py”, line 1, in
from data.gaze_dataset import GazePointAllDataset
ModuleNotFoundError: No module named ‘data’

看一下cord的.npy文件，读出来。或者发邮件吧。

3.RuntimeError: expand(torch.cuda.FloatTensor{[2, 1]}, size=[]): the number of sizes provided (0) must be greater or equal to the number of dimensions in the tensor (2)

Traceback (most recent call last):
File “/home/workspace/feifeizhang/RGBDgaze/RGBD-Gaze-master/code/trainer_aaai.py”, line 666, in
trainer.train_base(epochs= total_epochs, lr=learning_rate,use_refined_depth=True)
File “/home/workspace/feifeizhang/RGBDgaze/RGBD-Gaze-master/code/trainer_aaai.py”, line 118, in train_base
self._train_base_epoch()
File “/home/workspace/feifeizhang/RGBDgaze/RGBD-Gaze-master/code/trainer_aaai.py”, line 411, in _train_base_epoch
left_eye_info[j, 2] = th.median(cur_depth).item() * face_factor
RuntimeError: expand(torch.cuda.FloatTensor{[2, 1]}, size=[]): the number of sizes provided (0) must be greater or equal to the number of dimensions in the tensor (2)

非常奇怪的是，当我把batchsize设置为1时，此问题消失了orz

if self.temps.use_refined_depth:
    with th.no_grad():
        left_eye_bbox[:, :2] -= face_bbox[:, :2]
        left_eye_bbox[:, 2:] -= face_bbox[:, :2]
        right_eye_bbox[:, :2] -= face_bbox[:, :2]
        right_eye_bbox[:, 2:] -= face_bbox[:, :2]#减去坐上坐标，相当于是得到在face中眼睛的位置
        left_eye_bbox = th.clamp(face_factor * left_eye_bbox, min=0, max=223)#用斜坡函数
        right_eye_bbox = th.clamp(face_factor * right_eye_bbox, min=0, max=223)

    for j, lb in enumerate(left_eye_bbox):
        #left_eye_bbox torchsize[2,4]
        #refined_depth torchsize[2,1,224,224]
        cur_depth = refined_depth[j, :, int(lb[1]):int(lb[3]), int(lb[0]):int(lb[2])]
        #cur_depth[1,86,86]
        left_eye_info[j, 2] = th.median(cur_depth).item() * face_factor
        #left_eye_info  torchsize[2,3]
    for j, rb in enumerate(right_eye_bbox):
        cur_depth = refined_depth[j, :, int(rb[1]):int(rb[3]), int(rb[0]):int(rb[2])]
        right_eye_info[j, 2] = th.median(cur_depth).item() * face_factor

        #打印出维度，查看结果
        print("the shape of depth:",cur_depth.shape)
        print("the shape of left_eye_info :",left_eye_info.shape)
        print("left_eye_info[j, 2]",left_eye_info[j, 2])
        a= th.median(cur_depth).item() * face_factor
        print("median depth:",a)
        print("median shape",a.shape)
>>the shape of depth: torch.Size([1, 89, 90])
>>the shape of left_eye_info : torch.Size([2, 3])
>>left_eye_info[j, 2] tensor(0.4084, device='cuda:0')
>>median depth: tensor([[0.2566],
        [0.3694]], device='cuda:0')
>>median shape torch.Size([2, 1])

the shape of depth: torch.Size([1, 88, 88])
the shape of left_eye_info : torch.Size([16, 3])
left_eye_info[j, 2] tensor(0.9088, device=’cuda:0’)
median depth: tensor([[0.6016],
[0.6016],
[0.5006],
[0.5006],
[0.6016],
[0.6016],
[0.5006],
[0.5025],
[0.5006],
[0.4179],
[0.4179],
[0.5025],
[0.3485],
[0.5006],
[0.4179],
[0.5025]], device=’cuda:0’)
median shape torch.Size([16, 1])
Traceback (most recent call last):
File “/home/workspace/feifeizhang/RGBDgaze/RGBD-Gaze-master/code/trainer_aaai.py”, line 676, in
trainer.train_base(epochs= total_epochs, lr=learning_rate,use_refined_depth=True)
File “/home/workspace/feifeizhang/RGBDgaze/RGBD-Gaze-master/code/trainer_aaai.py”, line 119, in train_base
self._train_base_epoch()
File “/home/workspace/feifeizhang/RGBDgaze/RGBD-Gaze-master/code/trainer_aaai.py”, line 419, in _train_base_epoch
left_eye_info[j, 2] = th.mean(a).item() * face_factor
RuntimeError: expand(torch.cuda.FloatTensor{[16, 1]}, size=[]): the number of sizes provided (0) must be greater or equal to the number of dimensions in the tensor (2)

找到原因啦！其实是face_factor是这个batch的face_factor

程序结构

trainer_aaai.py

class GazeTrainer(Trainer):
#继承Trainer类
	
	self.weights_init(self.models.decoder)
    def train_base(self, epochs, lr=1e-4, use_refined_depth=False, fine_tune_headpose=True):
    def train_headpose(self, epochs, lr=2e-4, lambda_loss_mse=1):
        self.temps.headpose_logger = self.logger.getChild('train_headpose')
        self.temps.headpose_logger.info('preparing for headpose training loop.')
        self.temps.headpose_logger = self.logger.getChild('train_headpose')
        self.temps.headpose_logger.info('preparing for headpose training loop.')
        #没看明白此处log来自于哪里？
        # prepare logger
        # prepare dataloader 调用def _get_trainloader(self):
        # start training loop
        
    def resume(self, filename):
    def _prepare_model(self, model, train=True):
    def _get_trainloader(self):
        #data_transforms，
        #调用        transformed_train_dataset = GazePointAllDataset(root_dir=self.data_root,transform=data_transforms['train'],phase='train',face_image=True, face_depth=True, eye_image=True,eye_depth=True,info=True, eye_bbox=True, face_bbox=True, eye_coord=True)
        #参数有faceimage、depth、bbox，eye_image、eye_depth、eye_bbox、eye_coord）
        #调用	gaze_dataset.py   
	def _get_valloader(self):
        
	def _init_base_meters(self):
    def _init_headpose_meters(self):
        
	def _plot_base(self):
	def _plot_headpose(self):
        
    def _log_base(self):
    def _log_headpose(self):
    
    def _train_base_epoch(self):
    def _train_headpose_epoch(selfs):
        
    def _test_base(self):
    def _test_headpose(self):

    trainer = GazeTrainer(
        exp_name="gaze_aaai_refine_headpose"
    )

在调用GazeTrainer类后会继承父类Trainer，父类位于文件下GAZE/RGBD-Gaze-master/code/utils/trainer.py

1 2	def __init__(self, checkpoint_dir='./', is_cuda=True):#初始化

GAZE/RGBD-Gaze-master/code/utils/edict.py

#对于字典的编辑

gaze_aaai.py

def resnet34(pretrained=False, **kwargs):
class Decoder(nn.Module):
    #左右眼decoder
class DepthBCE(nn.Module):
class RefineDepth(nn.Module):
    self.depth_block4 
	#faceblock1、2、3、4，depthblock1、2、3、4
    #每一层输出的向量维度一样，卷积，bn，relu 。in3，每一个block out 64、128、256、512
    self.down4 
    #4个down降低维度层数，in1024，out512，256，128，64，卷积、bn、rule，第一个down再relu之后加了resnet.
    self.head_pose
    #in512，out1024，128
    self.gen_block1

down层输入为1024，感觉是两个512的feature级联起来的。

def forward(self, face, depth):
    #face.shape BX3X224X224 depth.shape BX1X64X64
    face_f1 = self.face_block1(face)
    #face_f1.shape BX64X224X224
    face_f2 = self.face_block2(face_f1)
    #face_f2.shape BX128X112X112
    face_f3 = self.face_block3(face_f2)
    #face_f3.shape BX256X56X56
    face_f4 = self.face_block4(face_f3)
    #face_f4.shape BX512X28X28
    depth_f1 = self.depth_block1(depth)
    depth_f2 = self.depth_block2(depth_f1)
    depth_f3 = self.depth_block3(depth_f2)
    depth_f4 = self.depth_block4(depth_f3)
    
    #不同尺度的特征级联
    mixed_f4 = self.down1(th.cat([face_f4, depth_f4], dim=1))
    #mixed_f4.shape BX512X28X28
    mixed_f3 = self.down2(th.cat([face_f3, depth_f3], dim=1))
    #mixed_f3.shape BX256X56X56
    mixed_f2 = self.down3(th.cat([face_f2, depth_f2], dim=1))
    #mixed_f2.shape BX128X112X112
    mixed_f1 = self.down4(th.cat([face_f1, depth_f1], dim=1))
    #mixed_f1.shape BX64X224X224
    
    #对于小尺度的特征上采样，做特征金字塔
    gen_f3 = self.gen_block1(mixed_f4) + mixed_f3#zff: fpn
    #gen_f3.shape BX256X56X56
    gen_f2 = self.gen_block2(gen_f3) + mixed_f2
    #gen_f2.shape BX128X112X112
    gen_f1 = self.gen_block3(gen_f2) + mixed_f1
    #gen_f1.shape BX64X224X224
    gen_depth = self.gen_block4(gen_f1)
    #gen_f1.shape BX1X224X224
    head_pose = self.head_pose(mixed_f4)
    #head_pose.shape BX128X1x1
    return head_pose.view(head_pose.size(0), -1), gen_depth
#head_pose.shape(BX128)

GAZE/RGBD-Gaze-master/code/data/gaze_dataset.py

27英寸，长60厘米，宽34厘米（精确值：59.77厘米，33.62厘米）

在trainer_aaai.py调用时候

1
2

transformed_train_dataset = GazePointAllDataset(root_dir=self.data_root,transform=data_transforms['train'],phase='train',face_image=True, face_depth=True, eye_image=True,eye_depth=True,info=True, eye_bbox=True, face_bbox=True, eye_coord=True)
       #参数有face_image、depth、bbox，eye_image、eye_depth、eye_bbox、face_bbox、info、eye_coord）

1 2	class GazePointAllDataset(data.Dataset): def __init__(self, root_dir, w_screen=59.77, h_screen=33.62, transform=None, phase="train", **kwargs):

程序错误：数据集train_csv没有left_eye_coord、lift_eye_coord,感觉是这个类调用有问题，或者是类的构造有问题。导致加载数据出错。

数据集结构

数据集由165231个RGB/depth图像对组成。使用159个参与者对应的图像(119,318个RGB/depth图像对)作为训练数据，使用其余59个参与者对应的数据(45,913个RGB/depth图像对)作为测试数据。

tran_meta.csv包含了119,318个RGB/depth图像对，表头有12项，分别为

A.图片index。

B.face_images图片存的位置,位于color文件夹下。

C.face_depth存的位置，位于projected_depth_calibration。

D.face_bbox，存于txt文件中，为4个坐标值,位于color文件夹下。

E.left_eye_image,位于color文件夹下。

F.right_eye_image,位于color文件夹下。

G.left_eye_depth，位于projected_depth_calibration下。

H.right_eye_depth，位于projected_depth_calibration下。

I.left_eye_bbox,位于color文件夹下。

J.right_eye_bbox,位于color文件夹下。

K.gaze_point,位于coordinate文件夹下，文件名以.npy结尾，文件中储存的是坐标。

L.has_landmark,值为TRUE或者FALSE，大部分值都为TRUE。

color、projected_depth_calibration、coordinate文件夹均有219个文件夹对应219个志愿者，每个志愿者的文件夹下有多组实验的数据。

color文件夹下存放了219个志愿者的rgb相关信息，一组实验包含7个信息，全脸、左右眼的图片以及bbox，还有人脸的68点landmark。

projected_depth_calibration文件夹下存放了219个志愿者的depth相关信息，一组实验包含3个信息，分别为左右眼和全脸depth。

coordinate文件夹下存放了219个志愿者眼睛坐标系。

少了 le 坐标系，r坐标系。不知道如果这俩怎么在网络中使用？如果拿掉会怎么样。.mat格式里边是不是保存有？python转换一个,mat文件至excel看一下格式吧

读取数据集遇到的问题

企图通过各个特征维度来获得 le cord 和ri cord是啥？/(ㄒoㄒ)/~~

resnet 34返回特征 512维度。（经过pooling，每个维度只有一个点）

def forward(self, face, depth):
    face_f1 = self.face_block1(face)#3->64卷积、bn、relu
    face_f2 = self.face_block2(face_f1)#64->128
    face_f3 = self.face_block3(face_f2)#128->256
    face_f4 = self.face_block4(face_f3)#256->512  28X28
    depth_f1 = self.depth_block1(depth)#1->64
    depth_f2 = self.depth_block2(depth_f1)#64->128
    depth_f3 = self.depth_block3(depth_f2)#128->256
    depth_f4 = self.depth_block4(depth_f3)#256->512 28X28
    mixed_f4 = self.down1(th.cat([face_f4, depth_f4], dim=1))#1024->512
    mixed_f3 = self.down2(th.cat([face_f3, depth_f3], dim=1))#512->256
    mixed_f2 = self.down3(th.cat([face_f2, depth_f2], dim=1))#256->128
    mixed_f1 = self.down4(th.cat([face_f1, depth_f1], dim=1))#128->64 28X28
    
    gen_f3 = self.gen_block1(mixed_f4) + mixed_f3#512->256反卷积 56X56
    gen_f2 = self.gen_block2(gen_f3) + mixed_f2#256->128 反卷积112X112
    gen_f1 = self.gen_block3(gen_f2) + mixed_f1#128->64  反卷积224X224
    gen_depth = self.gen_block4(gen_f1)#64->1 
    #和论文中的框图不一样，不过维度和 synthesize depth 对上了
    
    head_pose = self.head_pose(mixed_f4)#512,28X28->512,14X14->1024,7X7->1024,1X1->128,1X1
    return head_pose.view(head_pose.size(0), -1), gen_depth#6276. 1,224X224

似乎找到问题的答案啦！

#train_aaai.py
                for j, lb in enumerate(left_eye_bbox):
                    cur_depth = refined_depth[j, :, int(lb[1]):int(lb[3]), int(lb[0]):int(lb[2])]
                    left_eye_info[j, 2] = th.median(cur_depth).item() * face_factor
                for j, rb in enumerate(right_eye_bbox):
                    cur_depth = refined_depth[j, :, int(rb[1]):int(rb[3]), int(rb[0]):int(rb[2])]
                    right_eye_info[j, 2] = th.median(cur_depth).item() * face_factor
                    
 #gaze_aaai.py
l_coord = self.lcoord(th.cat([l_coord_feat, head_pose, linfo], 1))
        r_coord = self.rcoord(th.cat([r_coord_feat, head_pose, rinfo], 1))
    
    rinfo最终concatenate的有三个信息。depth均值和眼睛中间点级联了xe，ye

使用眼边缘六个点做平均获得眼睛中心位置。

屏幕分辨率1080（540），1960（980）。对于一个图片矩阵，（0,0）在左上，所以这里显示的landmark图片是upside down的。

处理

读出保存数据的csv，计算landmark眼周六点的平均值，获得双眼中心，保存至csv。对于landmark不存在的数据，从保存数据的csv中删除。

测试集没有删除数据，训练集合数据从119317删除至98629。

程序中对于gt处理，为什么有一个screen/2平移

w_screen=59.77, h_screen=33.62

gt[0] -= self.w_screen / 2

gt[1] -= self.h_screen / 2

为什么程序有一个减法？相当于平移。

color00008_face

color00009_face

上海科大的深度图的动态范围是啥？太黑了。

用了numpy和opencv读全都是黑乎乎的。

/home/workspace/feifeizhang/GAZE/RGBD-Gaze-master/SHtechSave/results/gaze_aaai_refine_headpose/val/depth

ep00iter0000_rf.png

看一下怎么保存的吧。动态范围

RGBD based gaze实验记录