mirror of
https://github.com/chen-gz/food_detection.git
synced 2025-04-20 06:55:23 +00:00
update
This commit is contained in:
commit
fa7961e00d
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
/misc/*
|
||||
*.csv
|
215
extract2.py
Executable file
215
extract2.py
Executable file
@ -0,0 +1,215 @@
|
||||
import csv
|
||||
import os
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
folder_list = [
|
||||
'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/7',
|
||||
'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/8',
|
||||
'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/9',
|
||||
'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/10',
|
||||
'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/11',
|
||||
'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/12',
|
||||
'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/13',
|
||||
'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/14',
|
||||
'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/15',
|
||||
'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/18',
|
||||
'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/8',
|
||||
'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/9',
|
||||
'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/10',
|
||||
'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/11',
|
||||
'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/12',
|
||||
'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/13',
|
||||
'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/14',
|
||||
'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/15',
|
||||
'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/16',
|
||||
'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/17',
|
||||
'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/8',
|
||||
'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/9',
|
||||
'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/10',
|
||||
'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/12',
|
||||
'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/13',
|
||||
'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/14',
|
||||
'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/15',
|
||||
'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/16',
|
||||
'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/17',
|
||||
'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/18',
|
||||
'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/10',
|
||||
'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/13',
|
||||
'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/15',
|
||||
'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/16',
|
||||
'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/17',
|
||||
'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/18',
|
||||
'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/7',
|
||||
'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/8',
|
||||
'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/9',
|
||||
'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/10',
|
||||
'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/11',
|
||||
'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/12',
|
||||
'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/13',
|
||||
'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/14',
|
||||
'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/15',
|
||||
'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/16',
|
||||
|
||||
# new data
|
||||
'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/9',
|
||||
'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/10',
|
||||
'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/11',
|
||||
'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/12',
|
||||
'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/13',
|
||||
'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/14',
|
||||
'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/15',
|
||||
'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/16',
|
||||
'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/17',
|
||||
|
||||
'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/8',
|
||||
'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/9',
|
||||
'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/10',
|
||||
'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/13',
|
||||
'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/14',
|
||||
'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/15',
|
||||
'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/16',
|
||||
'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/17',
|
||||
|
||||
'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/8',
|
||||
'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/9',
|
||||
'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/10',
|
||||
'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/11',
|
||||
'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/12',
|
||||
'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/13',
|
||||
'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/14',
|
||||
'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/15',
|
||||
'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/16',
|
||||
|
||||
'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/7',
|
||||
'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/8',
|
||||
'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/9',
|
||||
'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/10',
|
||||
'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/11',
|
||||
'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/12',
|
||||
'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/13',
|
||||
'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/14',
|
||||
|
||||
]
|
||||
|
||||
|
||||
from shutil import copyfile
|
||||
import time
|
||||
|
||||
def construct_vector(folder_path):
|
||||
label_file_name = "label.csv"
|
||||
clarify_result_name = "clarify_result.csv"
|
||||
label_filepath = os.path.join(folder_path, label_file_name)
|
||||
clarify_result_filepath = os.path.join(folder_path, clarify_result_name)
|
||||
if not os.path.exists(label_filepath) and not os.path.exists(
|
||||
clarify_result_filepath):
|
||||
print('no label file and clarify result file')
|
||||
vector_x, vector_y, vector_time = [], [], []
|
||||
label_list = []
|
||||
clarify_list = []
|
||||
with open(label_filepath) as f:
|
||||
label_reader = csv.reader(f, delimiter=',')
|
||||
for row in label_reader:
|
||||
label_list.append(row)
|
||||
with open(clarify_result_filepath) as f:
|
||||
clarify_reader = csv.reader(f, delimiter=',')
|
||||
for row in clarify_reader:
|
||||
clarify_list.append(row)
|
||||
for i in range(len(label_list)):
|
||||
for j in range(len(label_list[i])):
|
||||
label_list[i][j] = label_list[i][j].strip()
|
||||
for i in range(len(clarify_list)):
|
||||
for j in range(len(clarify_list[i])):
|
||||
clarify_list[i][j] = clarify_list[i][j].strip()
|
||||
food_name_list = []
|
||||
no_food_name_list = []
|
||||
|
||||
food_rectify = []
|
||||
with open("./food_rectify.csv") as f:
|
||||
for line in f:
|
||||
food_rectify.append(line.strip()+'.jpg')
|
||||
|
||||
for i in clarify_list:
|
||||
for j in label_list:
|
||||
if os.path.basename(i[0]) in j:
|
||||
vector_time.append(j[0])
|
||||
print(j[1])
|
||||
tmp_with_name = [j[1]]
|
||||
tmp_with_name += i[1:]
|
||||
if tmp_with_name[0] in food_rectify:
|
||||
vector_y.append(1)
|
||||
# vector_x.append(i[1:])
|
||||
vector_x.append(tmp_with_name)
|
||||
food_name_list.append(j[1])
|
||||
else:
|
||||
if int(j[2]) >= 3: # 3 and 4 recognized as food
|
||||
vector_y.append(1)
|
||||
# vector_x.append(i[1:])
|
||||
vector_x.append(tmp_with_name)
|
||||
food_name_list.append(j[1])
|
||||
else:
|
||||
vector_y.append(0)
|
||||
# vector_x.append(i[1:])
|
||||
vector_x.append(tmp_with_name)
|
||||
no_food_name_list.append(j[1])
|
||||
|
||||
t = time.time()
|
||||
for root, dirs, files in os.walk('./', topdown=False):
|
||||
for name in files:
|
||||
if name in food_name_list:
|
||||
src = os.path.join(root,name)
|
||||
dst = os.path.join('../food_detection_data/food', name)
|
||||
# if os.path.isfile(dst):
|
||||
# dst = os.path.join('./food/' , str(int(t)) + name)
|
||||
copyfile(src,dst)
|
||||
|
||||
print(src)
|
||||
print(dst)
|
||||
if name in no_food_name_list:
|
||||
src = os.path.join(root,name)
|
||||
dst = os.path.join('../food_detection_data/no_food', name)
|
||||
# if os.path.isfile(dst):
|
||||
# dst = os.path.join('./no_food/' ,str(int(t)) + name)
|
||||
print(src)
|
||||
print(dst)
|
||||
copyfile(src,dst)
|
||||
|
||||
return vector_x, vector_y, vector_time
|
||||
|
||||
|
||||
def construct_food_no_food(folder_name):
|
||||
vector_x, vector_y, _ = construct_vector(folder_name)
|
||||
# print(_)
|
||||
food_csv = 'food.csv'
|
||||
no_food_csv = 'no_food.csv'
|
||||
food_file = open(food_csv, 'a')
|
||||
no_food_file = open(no_food_csv, 'a')
|
||||
for i in range(len(vector_y)):
|
||||
if vector_y[i] == 1:
|
||||
food_file.write(','.join(vector_x[i]))
|
||||
food_file.write('\n')
|
||||
else:
|
||||
no_food_file.write(','.join(vector_x[i]))
|
||||
no_food_file.write('\n')
|
||||
food_file.close()
|
||||
no_food_file.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
section = [10, 10, 10, 6, 10, 9, 8, 9, 8]
|
||||
# for i in range(10):
|
||||
# vector_x, vector_y, _ = construct_vector(folder_list[i])
|
||||
# tmp_vector_y = [str(i) for i in vector_y]
|
||||
# print(' '.join((tmp_vector_y)))
|
||||
# plt.scatter(range(len(vector_y)), vector_y, s=0.5)
|
||||
# plt.show()
|
||||
# for i in range(0, 5):
|
||||
# vector_x, vector_y, vector_time = construct_vector(folder_list[i])
|
||||
# vector_y = list(map(str, vector_y))
|
||||
# print(' '.join(vector_y))
|
||||
|
||||
# second = [i for i in range(40)] + [i for i in range(36,46)]
|
||||
# print(second)
|
||||
# for i in second:
|
||||
for i in range(0, sum(section)):
|
||||
construct_food_no_food(folder_list[i])
|
31
get_FN_FP.py
Executable file
31
get_FN_FP.py
Executable file
@ -0,0 +1,31 @@
|
||||
import shutil
|
||||
import os
|
||||
|
||||
FP, FN = [], []
|
||||
with open('FP.txt') as f:
|
||||
for row in f:
|
||||
FP.append(row.strip())
|
||||
with open('FN.txt') as f:
|
||||
for row in f:
|
||||
FN.append(row.strip())
|
||||
FP = list(set(FP))
|
||||
FN = list(set(FN))
|
||||
|
||||
|
||||
for root, dirs, files in os.walk('.'):
|
||||
for f in files:
|
||||
if f in FP:
|
||||
src = os.path.join(root, f)
|
||||
dst = './FP/' + f
|
||||
if not os.path.isfile(dst):
|
||||
shutil.copyfile(src, dst)
|
||||
|
||||
if f in FN:
|
||||
src = os.path.join(root, f)
|
||||
dst = os.path.join('./FN/', f)
|
||||
if not os.path.isfile(dst):
|
||||
shutil.copyfile(src, dst)
|
||||
|
||||
|
||||
# print(os.path.join(root, name))
|
||||
# shutil.copyfile(src, dst, *, follow_symlinks=True)
|
113
get_dataset.ipynb
Normal file
113
get_dataset.ipynb
Normal file
@ -0,0 +1,113 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import csv\n",
|
||||
"from sklearn.utils import shuffle\n",
|
||||
"food_raw_data, non_food_raw_data, food_rectify = [], [], []\n",
|
||||
"food_data, non_food_data = [], []\n",
|
||||
"\n",
|
||||
"with open(\"food_rectify.csv\") as f:\n",
|
||||
" reader = csv.reader(f)\n",
|
||||
" for row in reader:\n",
|
||||
" food_rectify.append(row)\n",
|
||||
"\n",
|
||||
"with open(\"food.csv\") as f:\n",
|
||||
" reader = csv.reader(f)\n",
|
||||
" for row in reader:\n",
|
||||
" food_raw_data.append(row)\n",
|
||||
" \n",
|
||||
"with open(\"no_food.csv\") as f:\n",
|
||||
" reader = csv.reader(f)\n",
|
||||
" for row in reader:\n",
|
||||
" non_food_raw_data.append(row)\n",
|
||||
" \n",
|
||||
"food_data = food_raw_data\n",
|
||||
"\n",
|
||||
"for i in non_food_raw_data:\n",
|
||||
" if i[0] not in food_rectify:\n",
|
||||
" non_food_data.append(i)\n",
|
||||
" else:\n",
|
||||
" food_data.append(i)\n",
|
||||
"\n",
|
||||
"food_data = shuffle(food_data)\n",
|
||||
"non_food_data = shuffle(non_food_data)\n",
|
||||
"\n",
|
||||
"ratio = 0.75 \n",
|
||||
"train_food_len = int(len(food_data) * ratio)\n",
|
||||
"train_non_food_len = train_food_len\n",
|
||||
"\n",
|
||||
"test_food_len = len(food_data) - train_food_len\n",
|
||||
"test_non_food_len = int(len(non_food_data) * (1 - ratio))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"train_food = food_data[0:train_food_len]\n",
|
||||
"test_food = food_data[train_food_len:train_food_len + test_food_len]\n",
|
||||
"\n",
|
||||
"train_non_food = non_food_data[0:train_non_food_len]\n",
|
||||
"test_non_food = non_food_data[train_non_food_len:train_non_food_len + test_non_food_len]\n",
|
||||
"\n",
|
||||
"with open('train_food.csv', 'w') as f:\n",
|
||||
" write = csv.writer(f)\n",
|
||||
" write.writerows(train_food)\n",
|
||||
" \n",
|
||||
"with open('train_non_food.csv', 'w') as f:\n",
|
||||
" write = csv.writer(f)\n",
|
||||
" write.writerows(train_non_food )\n",
|
||||
"\n",
|
||||
"with open('test_food.csv', 'w') as f:\n",
|
||||
" write = csv.writer(f)\n",
|
||||
" write.writerows(test_food )\n",
|
||||
"with open('test_non_food.csv', 'w') as f:\n",
|
||||
" write = csv.writer(f)\n",
|
||||
" write.writerows(test_non_food)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a = [i for i in range(10)]\n",
|
||||
"print(a)\n",
|
||||
"print(a[0:4])\n",
|
||||
"print(a[4:7])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"non_food_raw_data"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
728
svm3_nb.ipynb
Normal file
728
svm3_nb.ipynb
Normal file
File diff suppressed because one or more lines are too long
351
svm_nb_funcs.py
Normal file
351
svm_nb_funcs.py
Normal file
@ -0,0 +1,351 @@
|
||||
from sklearn import svm
|
||||
import csv
|
||||
from sklearn.utils import shuffle
|
||||
|
||||
from sklearn.metrics import classification_report
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
food_file_path, non_food_file_path = 'food.csv', 'no_food.csv'
|
||||
# food_file_path2, non_food_file_path2 = 'food2.csv', 'no_food2.csv'
|
||||
food_raw_data, non_food_raw_data = [], []
|
||||
# food_raw_data2, non_food_raw_data2 = [], []
|
||||
|
||||
train_raw_food, train_raw_non_food = [], []
|
||||
test_raw_food, test_raw_non_food = [], []
|
||||
|
||||
train_vector_x, train_vector_y, train_vector_num = [], [], []
|
||||
test_vector_x, test_vector_y = [], []
|
||||
|
||||
useful_tag_list, useful_dict = [], {}
|
||||
|
||||
correlation_dict = {}
|
||||
|
||||
p_food = 0
|
||||
|
||||
|
||||
def get_raw_data():
|
||||
global food_raw_data, non_food_raw_data
|
||||
|
||||
with open(food_file_path) as f:
|
||||
csv_reader = csv.reader(f)
|
||||
for row in csv_reader:
|
||||
food_raw_data.append(row)
|
||||
with open(non_food_file_path) as f:
|
||||
csv_reader = csv.reader(f)
|
||||
for row in csv_reader:
|
||||
non_food_raw_data.append(row)
|
||||
|
||||
# with open(food_file_path2) as f:
|
||||
# csv_reader = csv.reader(f)
|
||||
# for row in csv_reader:
|
||||
# food_raw_data2.append(row)
|
||||
# with open(non_food_file_path2) as f:
|
||||
# csv_reader = csv.reader(f)
|
||||
# for row in csv_reader:
|
||||
# non_food_raw_data2.append(row)
|
||||
|
||||
|
||||
def shuffle_raw_data():
|
||||
global food_raw_data, non_food_raw_data
|
||||
# non_food_raw_data = non_food_raw_data[:15000]
|
||||
# non_food_raw_data = non_food_raw_data[:len(food_raw_data)]
|
||||
food_raw_data = shuffle(food_raw_data)
|
||||
non_food_raw_data = shuffle(non_food_raw_data)
|
||||
# non_food_raw_data = non_food_raw_data[:15000]
|
||||
non_food_raw_data = non_food_raw_data[:len(food_raw_data)]
|
||||
# non_food_raw_data = non_food_raw_data[00000]
|
||||
|
||||
|
||||
|
||||
def div_train_test_raw_data(ratio=0.75):
|
||||
global food_raw_data, non_food_raw_data, train_raw_food, \
|
||||
train_raw_non_food, test_raw_food, test_raw_non_food, \
|
||||
food_raw_data2, non_food_raw_data2
|
||||
# remove some non_food_raw_data
|
||||
# non_food_raw_data = non_food_raw_data[:10000]
|
||||
train_food_len = int(len(food_raw_data) * ratio)
|
||||
train_non_food_len = int(len(non_food_raw_data) * ratio)
|
||||
train_raw_food = food_raw_data[0:train_food_len]
|
||||
train_raw_non_food = non_food_raw_data[0:train_non_food_len]
|
||||
test_raw_food = food_raw_data[train_food_len:]
|
||||
test_raw_non_food = non_food_raw_data[train_non_food_len:]
|
||||
|
||||
# train_raw_food = food_raw_data
|
||||
# test_raw_food = food_raw_data2
|
||||
|
||||
# train_raw_non_food = non_food_raw_data
|
||||
# test_raw_non_food = non_food_raw_data2
|
||||
|
||||
def save_raw_data_train_test():
|
||||
global food_raw_data, non_food_raw_data, train_raw_food, \
|
||||
train_raw_non_food, test_raw_food, test_raw_non_food
|
||||
with open('train_food.csv', 'w') as f:
|
||||
write = csv.writer(f)
|
||||
write.writerows(train_raw_food)
|
||||
with open('train_non_food.csv', 'w') as f:
|
||||
write = csv.writer(f)
|
||||
write.writerows(train_raw_non_food )
|
||||
|
||||
with open('test_food.csv', 'w') as f:
|
||||
write = csv.writer(f)
|
||||
write.writerows(test_raw_food )
|
||||
with open('test_non_food.csv', 'w') as f:
|
||||
write = csv.writer(f)
|
||||
write.writerows(test_raw_non_food)
|
||||
|
||||
for i in train_raw_food:
|
||||
i = i[1:]
|
||||
for i in train_raw_non_food:
|
||||
i = i[1:]
|
||||
for i in test_raw_food:
|
||||
i = i[1:]
|
||||
for i in test_raw_non_food:
|
||||
i = i[1:]
|
||||
print(len(train_raw_food))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def count_dict(raw_data, threshold=0.5):
|
||||
counter_dict = {} # only collect from train data
|
||||
for i in raw_data:
|
||||
for j in range(0, len(i) - 1, 2):
|
||||
tmp = str(i[j]).strip()
|
||||
if float(i[j + 1]) > threshold:
|
||||
if tmp not in counter_dict:
|
||||
counter_dict[tmp] = 1
|
||||
else:
|
||||
counter_dict[tmp] += 1
|
||||
else:
|
||||
# if tmp not in counter_dict:
|
||||
# counter_dict[tmp] = 0
|
||||
pass
|
||||
return counter_dict
|
||||
|
||||
|
||||
def get_use_tag(use_all=False, threshold=0.5):
|
||||
global useful_tag_list, food_raw_data, non_food_raw_data, useful_dict
|
||||
useful_tag_list, useful_dict = [], {}
|
||||
food_tag_dict = count_dict(train_raw_food)
|
||||
non_food_tag_dict = count_dict(train_raw_non_food)
|
||||
|
||||
if use_all:
|
||||
for i in non_food_tag_dict.keys():
|
||||
if i not in food_tag_dict.keys():
|
||||
food_tag_dict[i] = non_food_tag_dict[i]
|
||||
else:
|
||||
food_tag_dict[i] += non_food_tag_dict[i]
|
||||
# food_tag_dict.update(non_food_tag_dict)
|
||||
|
||||
appear_times = 0
|
||||
appear_list = []
|
||||
for i in food_tag_dict.keys():
|
||||
appear_times += food_tag_dict[i]
|
||||
appear_list.append(food_tag_dict[i])
|
||||
appear_list.sort(reverse=True)
|
||||
useful_bound = int(appear_times * threshold)
|
||||
bound = 0
|
||||
pre_sum = 0
|
||||
for i in range(len(appear_list)):
|
||||
pre_sum += appear_list[i]
|
||||
if pre_sum > useful_bound:
|
||||
bound = appear_list[i]
|
||||
break
|
||||
for i in food_tag_dict.keys():
|
||||
if food_tag_dict[i] > bound:
|
||||
useful_tag_list.append(i)
|
||||
counter = 0
|
||||
for i in useful_tag_list:
|
||||
useful_dict[i] = counter
|
||||
counter += 1
|
||||
|
||||
|
||||
def get_correlation():
|
||||
global train_raw_food, correlation_dict
|
||||
food_tag_dict = count_dict(train_raw_food)
|
||||
merged_dict = count_dict(train_raw_non_food)
|
||||
|
||||
for i in food_tag_dict.keys():
|
||||
if i not in merged_dict.keys():
|
||||
merged_dict[i] = food_tag_dict[i]
|
||||
else:
|
||||
merged_dict[i] += food_tag_dict[i]
|
||||
|
||||
for i in food_tag_dict.keys():
|
||||
if i not in correlation_dict.keys():
|
||||
correlation_dict[i] = food_tag_dict[i] / len(food_raw_data)
|
||||
# correlation_dict[i] = food_tag_dict[i] / merged_dict[i]
|
||||
else:
|
||||
print("error in get correlation function")
|
||||
|
||||
|
||||
def construct_train_test_set():
|
||||
global train_raw_food, train_raw_non_food, test_raw_food, \
|
||||
test_raw_non_food, train_vector_x, train_vector_y, \
|
||||
test_vector_x, test_vector_y, train_vector_num
|
||||
train_vector_x, train_vector_y, train_vector_num = [], [], []
|
||||
test_vector_x, test_vector_y = [], []
|
||||
vector_x = []
|
||||
vector_y = []
|
||||
for i in train_raw_food:
|
||||
tmp = [0 for i in range(len(useful_tag_list))]
|
||||
for j in range(0, len(i) - 1, 2):
|
||||
if i[j] in useful_dict.keys():
|
||||
tmp[useful_dict[i[j]]] = float(
|
||||
i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1]))
|
||||
else:
|
||||
# TODO: should be changed to random probability
|
||||
pass
|
||||
vector_x.append(tmp)
|
||||
vector_y.append("food")
|
||||
train_vector_num.append(1)
|
||||
|
||||
for i in train_raw_non_food:
|
||||
tmp = [0 for i in range(len(useful_tag_list))]
|
||||
for j in range(0, len(i) - 1, 2):
|
||||
if i[j] in useful_dict.keys():
|
||||
tmp[useful_dict[i[j]]] = float(
|
||||
i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1]))
|
||||
else:
|
||||
# TODO: should be changed to random probability
|
||||
pass
|
||||
vector_x.append(tmp)
|
||||
vector_y.append("no food")
|
||||
train_vector_num.append(-1)
|
||||
|
||||
train_vector_x, train_vector_y = vector_x, vector_y
|
||||
|
||||
vector_x, vector_y = [], []
|
||||
|
||||
for i in test_raw_food:
|
||||
tmp = [0 for i in range(len(useful_tag_list))]
|
||||
for j in range(0, len(i) - 1, 2):
|
||||
if i[j] in useful_dict.keys():
|
||||
tmp[useful_dict[i[j]]] = float(
|
||||
i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1]))
|
||||
else:
|
||||
# TODO: should be changed to random probability
|
||||
pass
|
||||
vector_x.append(tmp)
|
||||
vector_y.append("food")
|
||||
|
||||
for i in test_raw_non_food:
|
||||
tmp = [0 for i in range(len(useful_tag_list))]
|
||||
for j in range(0, len(i) - 1, 2):
|
||||
if i[j] in useful_dict.keys():
|
||||
tmp[useful_dict[i[j]]] = float(
|
||||
i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1]))
|
||||
else:
|
||||
# TODO: should be changed to random probability
|
||||
pass
|
||||
vector_x.append(tmp)
|
||||
vector_y.append("no food")
|
||||
test_vector_x, test_vector_y = vector_x, vector_y
|
||||
|
||||
|
||||
def confision_matrix(ground_true, predict, print_result=False):
|
||||
TP, FP, FN, TN = 0, 0, 0, 0
|
||||
for i in range(len(ground_true)):
|
||||
if ground_true[i] == "food" and predict[i] == "food":
|
||||
TP += 1
|
||||
elif ground_true[i] == "no food" and predict[i] == "food":
|
||||
FP += 1
|
||||
elif ground_true[i] == "food" and predict[i] == "no food":
|
||||
FN += 1
|
||||
elif ground_true[i] == "no food" and predict[i] == "no food":
|
||||
TN += 1
|
||||
TPR = TP / (TP + FN)
|
||||
FPR = FP / (FP + TN)
|
||||
if print_result:
|
||||
print("TP: ", TP, "FN: ", FN, "TN: ", TN, "FP :", FP)
|
||||
# print("Sensitivity = ", TP/(TP+FN), end=" ")
|
||||
# print("Specificity = ", TN/(TN+FP))
|
||||
# print("Precision = ", TP/(TP+FP), end=" ")
|
||||
# print("Accuracy = ", (TP + TN)/(TP+TN+FN+FP))
|
||||
return TPR, FPR
|
||||
|
||||
|
||||
def clarifai_result():
|
||||
global test_raw_food, test_raw_non_food
|
||||
TPR_list = []
|
||||
FPR_list = []
|
||||
for k in range(10):
|
||||
TP, FP, FN, TN = 0, 0, 0, 0
|
||||
ratio = k / 10
|
||||
# print(ratio)
|
||||
for i in test_raw_food:
|
||||
have = False
|
||||
for j in range(len(i)):
|
||||
if i[j] == "food" and float(i[j + 1]) > ratio:
|
||||
have = True
|
||||
if not have:
|
||||
FN += 1
|
||||
else:
|
||||
TP += 1
|
||||
|
||||
for i in test_raw_non_food:
|
||||
have = False
|
||||
for j in range(len(i)):
|
||||
if i[j] == "food" and float(i[j + 1]) > ratio:
|
||||
have = True
|
||||
if not have:
|
||||
TN += 1
|
||||
else:
|
||||
FP += 1
|
||||
|
||||
TPR = TP / (TP + FN)
|
||||
FPR = FP / (FP + TN)
|
||||
TPR_list.append(TPR)
|
||||
FPR_list.append(FPR)
|
||||
if k == 9 or k == 8 or k == 7 or k ==6:
|
||||
# if k == 7 or k == 6 or k == 5 or k == 4:
|
||||
plt.scatter([FPR], [TPR], marker='o', c='green')
|
||||
# print("TRP :", TPR)
|
||||
# print("FPR :", FPR)
|
||||
return TPR_list, FPR_list
|
||||
#
|
||||
# print("clarify result**********")
|
||||
# print("TP: ", TP, "FN: ", FN, "TN: ", TN, "FP :", FP)
|
||||
# print("Sensitivity = ", TP / (TP + FN), end=" ")
|
||||
# print("Specificity = ", TN / (TN + FP))
|
||||
# print("Precision = ", TP / (TP + FP), end=" ")
|
||||
# print("Accuracy = ", (TP + TN) / (TP + TN + FN + FP))
|
||||
# print("burden = ", (TP + FP) / (TP +TN+FN+FP))
|
||||
# plt.scatter([1 - 0.789866667, 1 - 0.684, 1 - 0.55786, 1-0.4512], [0.584493042, 0.666003976, 0.753479125, 0.833664679], marker='o', c='green')
|
||||
|
||||
|
||||
def get_p_food_before_balance():
|
||||
# food_num_ori = 0
|
||||
# with open("#food_ori.csv") as f:
|
||||
# csv_reader = csv.reader(f)
|
||||
# for row in csv_reader:
|
||||
# food_num_ori += 1
|
||||
|
||||
# no_food_num_ori = 0
|
||||
# with open("#no food_ori.csv") as f:
|
||||
# csv_reader = csv.reader(f)
|
||||
# for row in csv_reader:
|
||||
# no_food_num_ori += 1
|
||||
# p_food = food_num_ori / no_food_num_ori
|
||||
# return p_food
|
||||
# p_food = len(train_raw_food) / (len(train_raw_food) + len(train_raw_non_food))
|
||||
# return p_food
|
||||
pass
|
||||
|
||||
|
||||
def init(use_all=True):
|
||||
global p_food, train_raw_food, train_raw_non_food
|
||||
get_raw_data()
|
||||
shuffle_raw_data()
|
||||
div_train_test_raw_data(0.75)
|
||||
save_raw_data_train_test()
|
||||
clarifai_result()
|
||||
p_food = len(train_raw_food) / (len(train_raw_food) + len(train_raw_non_food))
|
||||
get_correlation()
|
||||
get_use_tag(use_all)
|
||||
construct_train_test_set()
|
||||
|
Loading…
x
Reference in New Issue
Block a user