commit fa7961e00d7d29e1e3dad9a530eef9930e1e2ad3 Author: Guangzong Date: Thu Feb 11 13:34:23 2021 -0500 update diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..776f8d3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/misc/* +*.csv diff --git a/extract2.py b/extract2.py new file mode 100755 index 0000000..0fa8285 --- /dev/null +++ b/extract2.py @@ -0,0 +1,215 @@ +import csv +import os +import numpy as np +import matplotlib.pyplot as plt + +folder_list = [ + 'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/7', + 'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/8', + 'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/9', + 'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/10', + 'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/11', + 'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/12', + 'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/13', + 'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/14', + 'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/15', + 'PHASE3_HH01_T2_EButtom-402/eButton_Data/Camera/ID0402_Nov.27/18', + 'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/8', + 'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/9', + 'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/10', + 'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/11', + 'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/12', + 'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/13', + 'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/14', + 'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/15', + 'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/16', + 'PHASE3_HH01_T2_EButtom_411-Mother/eButton_Data/Camera/ID0411_Nov.27/17', + 'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/8', + 'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/9', + 'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/10', + 'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/12', + 'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/13', + 'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/14', + 'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/15', + 'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/16', + 'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/17', + 'PHASE3_HH02_T2_eButton-402_Mother/eButton_Data/Camera/ID0402_Nov.28/18', + 'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/10', + 'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/13', + 'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/15', + 'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/16', + 'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/17', + 'PHASE3_HH02_T2_eButton-411_Adolescent_child/eButton_Data/Camera/ID0411_Nov.28/18', + 'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/7', + 'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/8', + 'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/9', + 'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/10', + 'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/11', + 'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/12', + 'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/13', + 'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/14', + 'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/15', + 'PHASE3_HH02_T4-eButton-411_Mother/eButton_Data/Camera/ID0411_Dec.02/16', + + # new data + 'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/9', + 'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/10', + 'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/11', + 'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/12', + 'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/13', + 'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/14', + 'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/15', + 'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/16', + 'PHASE3_HH03_eButton-402_Father/eButton_Data/Camera/ID0402_Nov.30/17', + + 'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/8', + 'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/9', + 'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/10', + 'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/13', + 'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/14', + 'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/15', + 'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/16', + 'PHASE3_HH03_eButton-411_Mother/eButton_Data/Camera/ID0411_Nov.30/17', + + 'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/8', + 'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/9', + 'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/10', + 'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/11', + 'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/12', + 'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/13', + 'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/14', + 'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/15', + 'PHASE3_HH03_T4_eBUTTON_402-ADOLESCENT_BOY/eButton_Data/Camera/ID0402_Dec.03/16', + + 'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/7', + 'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/8', + 'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/9', + 'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/10', + 'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/11', + 'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/12', + 'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/13', + 'PHASE3_HH05_eButton-402_Father/eButton_Data/Camera/ID0402_Dec.05/14', + +] + + +from shutil import copyfile +import time + +def construct_vector(folder_path): + label_file_name = "label.csv" + clarify_result_name = "clarify_result.csv" + label_filepath = os.path.join(folder_path, label_file_name) + clarify_result_filepath = os.path.join(folder_path, clarify_result_name) + if not os.path.exists(label_filepath) and not os.path.exists( + clarify_result_filepath): + print('no label file and clarify result file') + vector_x, vector_y, vector_time = [], [], [] + label_list = [] + clarify_list = [] + with open(label_filepath) as f: + label_reader = csv.reader(f, delimiter=',') + for row in label_reader: + label_list.append(row) + with open(clarify_result_filepath) as f: + clarify_reader = csv.reader(f, delimiter=',') + for row in clarify_reader: + clarify_list.append(row) + for i in range(len(label_list)): + for j in range(len(label_list[i])): + label_list[i][j] = label_list[i][j].strip() + for i in range(len(clarify_list)): + for j in range(len(clarify_list[i])): + clarify_list[i][j] = clarify_list[i][j].strip() + food_name_list = [] + no_food_name_list = [] + + food_rectify = [] + with open("./food_rectify.csv") as f: + for line in f: + food_rectify.append(line.strip()+'.jpg') + + for i in clarify_list: + for j in label_list: + if os.path.basename(i[0]) in j: + vector_time.append(j[0]) + print(j[1]) + tmp_with_name = [j[1]] + tmp_with_name += i[1:] + if tmp_with_name[0] in food_rectify: + vector_y.append(1) + # vector_x.append(i[1:]) + vector_x.append(tmp_with_name) + food_name_list.append(j[1]) + else: + if int(j[2]) >= 3: # 3 and 4 recognized as food + vector_y.append(1) + # vector_x.append(i[1:]) + vector_x.append(tmp_with_name) + food_name_list.append(j[1]) + else: + vector_y.append(0) + # vector_x.append(i[1:]) + vector_x.append(tmp_with_name) + no_food_name_list.append(j[1]) + + t = time.time() + for root, dirs, files in os.walk('./', topdown=False): + for name in files: + if name in food_name_list: + src = os.path.join(root,name) + dst = os.path.join('../food_detection_data/food', name) + # if os.path.isfile(dst): + # dst = os.path.join('./food/' , str(int(t)) + name) + copyfile(src,dst) + + print(src) + print(dst) + if name in no_food_name_list: + src = os.path.join(root,name) + dst = os.path.join('../food_detection_data/no_food', name) + # if os.path.isfile(dst): + # dst = os.path.join('./no_food/' ,str(int(t)) + name) + print(src) + print(dst) + copyfile(src,dst) + + return vector_x, vector_y, vector_time + + +def construct_food_no_food(folder_name): + vector_x, vector_y, _ = construct_vector(folder_name) + # print(_) + food_csv = 'food.csv' + no_food_csv = 'no_food.csv' + food_file = open(food_csv, 'a') + no_food_file = open(no_food_csv, 'a') + for i in range(len(vector_y)): + if vector_y[i] == 1: + food_file.write(','.join(vector_x[i])) + food_file.write('\n') + else: + no_food_file.write(','.join(vector_x[i])) + no_food_file.write('\n') + food_file.close() + no_food_file.close() + + +if __name__ == '__main__': + section = [10, 10, 10, 6, 10, 9, 8, 9, 8] + # for i in range(10): + # vector_x, vector_y, _ = construct_vector(folder_list[i]) + # tmp_vector_y = [str(i) for i in vector_y] + # print(' '.join((tmp_vector_y))) + # plt.scatter(range(len(vector_y)), vector_y, s=0.5) + # plt.show() + # for i in range(0, 5): + # vector_x, vector_y, vector_time = construct_vector(folder_list[i]) + # vector_y = list(map(str, vector_y)) + # print(' '.join(vector_y)) + + # second = [i for i in range(40)] + [i for i in range(36,46)] + # print(second) + # for i in second: + for i in range(0, sum(section)): + construct_food_no_food(folder_list[i]) diff --git a/get_FN_FP.py b/get_FN_FP.py new file mode 100755 index 0000000..33c953b --- /dev/null +++ b/get_FN_FP.py @@ -0,0 +1,31 @@ +import shutil +import os + +FP, FN = [], [] +with open('FP.txt') as f: + for row in f: + FP.append(row.strip()) +with open('FN.txt') as f: + for row in f: + FN.append(row.strip()) +FP = list(set(FP)) +FN = list(set(FN)) + + +for root, dirs, files in os.walk('.'): + for f in files: + if f in FP: + src = os.path.join(root, f) + dst = './FP/' + f + if not os.path.isfile(dst): + shutil.copyfile(src, dst) + + if f in FN: + src = os.path.join(root, f) + dst = os.path.join('./FN/', f) + if not os.path.isfile(dst): + shutil.copyfile(src, dst) + + +# print(os.path.join(root, name)) +# shutil.copyfile(src, dst, *, follow_symlinks=True) diff --git a/get_dataset.ipynb b/get_dataset.ipynb new file mode 100644 index 0000000..6597d37 --- /dev/null +++ b/get_dataset.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import csv\n", + "from sklearn.utils import shuffle\n", + "food_raw_data, non_food_raw_data, food_rectify = [], [], []\n", + "food_data, non_food_data = [], []\n", + "\n", + "with open(\"food_rectify.csv\") as f:\n", + " reader = csv.reader(f)\n", + " for row in reader:\n", + " food_rectify.append(row)\n", + "\n", + "with open(\"food.csv\") as f:\n", + " reader = csv.reader(f)\n", + " for row in reader:\n", + " food_raw_data.append(row)\n", + " \n", + "with open(\"no_food.csv\") as f:\n", + " reader = csv.reader(f)\n", + " for row in reader:\n", + " non_food_raw_data.append(row)\n", + " \n", + "food_data = food_raw_data\n", + "\n", + "for i in non_food_raw_data:\n", + " if i[0] not in food_rectify:\n", + " non_food_data.append(i)\n", + " else:\n", + " food_data.append(i)\n", + "\n", + "food_data = shuffle(food_data)\n", + "non_food_data = shuffle(non_food_data)\n", + "\n", + "ratio = 0.75 \n", + "train_food_len = int(len(food_data) * ratio)\n", + "train_non_food_len = train_food_len\n", + "\n", + "test_food_len = len(food_data) - train_food_len\n", + "test_non_food_len = int(len(non_food_data) * (1 - ratio))\n", + "\n", + "\n", + "train_food = food_data[0:train_food_len]\n", + "test_food = food_data[train_food_len:train_food_len + test_food_len]\n", + "\n", + "train_non_food = non_food_data[0:train_non_food_len]\n", + "test_non_food = non_food_data[train_non_food_len:train_non_food_len + test_non_food_len]\n", + "\n", + "with open('train_food.csv', 'w') as f:\n", + " write = csv.writer(f)\n", + " write.writerows(train_food)\n", + " \n", + "with open('train_non_food.csv', 'w') as f:\n", + " write = csv.writer(f)\n", + " write.writerows(train_non_food )\n", + "\n", + "with open('test_food.csv', 'w') as f:\n", + " write = csv.writer(f)\n", + " write.writerows(test_food )\n", + "with open('test_non_food.csv', 'w') as f:\n", + " write = csv.writer(f)\n", + " write.writerows(test_non_food)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "a = [i for i in range(10)]\n", + "print(a)\n", + "print(a[0:4])\n", + "print(a[4:7])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "non_food_raw_data" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/svm3_nb.ipynb b/svm3_nb.ipynb new file mode 100644 index 0000000..1885e45 --- /dev/null +++ b/svm3_nb.ipynb @@ -0,0 +1,728 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import svm\n", + "import csv\n", + "from sklearn.utils import shuffle\n", + "\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.linear_model import LogisticRegression\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "food_file_path, non_food_file_path = 'food.csv', 'no_food.csv'\n", + "# food_file_path2, non_food_file_path2 = 'food2.csv', 'no_food2.csv'\n", + "food_raw_data, non_food_raw_data = [], []\n", + "# food_raw_data2, non_food_raw_data2 = [], []\n", + "\n", + "train_raw_food, train_raw_non_food = [], []\n", + "test_raw_food, test_raw_non_food = [], []\n", + "\n", + "train_vector_x, train_vector_y, train_vector_num = [], [], []\n", + "test_vector_x, test_vector_y = [], []\n", + "\n", + "useful_tag_list, useful_dict = [], {}\n", + "\n", + "correlation_dict = {}\n", + "\n", + "p_food = 0\n", + "\n", + "\n", + "def get_raw_data():\n", + " global food_raw_data, non_food_raw_data\n", + " \n", + " with open(food_file_path) as f:\n", + " csv_reader = csv.reader(f)\n", + " for row in csv_reader:\n", + " food_raw_data.append(row)\n", + " \n", + " with open(non_food_file_path) as f:\n", + " csv_reader = csv.reader(f)\n", + " for row in csv_reader:\n", + " non_food_raw_data.append(row)\n", + "\n", + "\n", + "\n", + "def get_raw_data_seperate(test_total = False):\n", + " global train_raw_food,train_raw_non_food,\\\n", + " test_raw_food, test_raw_non_food\n", + "\n", + " with open(\"train_food.csv\") as f:\n", + " csv_reader = csv.reader(f)\n", + " for row in csv_reader:\n", + " train_raw_food.append(row)\n", + " with open(\"train_non_food.csv\") as f:\n", + " csv_reader = csv.reader(f)\n", + " for row in csv_reader:\n", + " train_raw_non_food.append(row)\n", + " with open(\"test_food.csv\") as f:\n", + " csv_reader = csv.reader(f)\n", + " for row in csv_reader:\n", + " test_raw_food.append(row)\n", + " if test_total is True:\n", + " with open(\"test_non_food_total.csv\") as f:\n", + " csv_reader = csv.reader(f)\n", + " for row in csv_reader:\n", + " test_raw_non_food.append(row)\n", + " else:\n", + " with open(\"test_non_food.csv\") as f:\n", + " csv_reader = csv.reader(f)\n", + " for row in csv_reader:\n", + " test_raw_non_food.append(row)\n", + "\n", + "\n", + " for i in range(len(train_raw_food)):\n", + " train_raw_food[i] = train_raw_food[i][1:]\n", + " for i in range(len(train_raw_non_food)):\n", + " train_raw_non_food[i] = train_raw_non_food[i][1:]\n", + " for i in range(len(test_raw_food)):\n", + " test_raw_food[i] = test_raw_food[i][1:]\n", + " for i in range(len(test_raw_non_food)):\n", + " test_raw_non_food[i] = test_raw_non_food[i][1:]\n", + " # with open(food_file_path2) as f:\n", + " # csv_reader = csv.reader(f)\n", + " # for row in csv_reader:\n", + " # food_raw_data2.append(row)\n", + " # with open(non_food_file_path2) as f:\n", + " # csv_reader = csv.reader(f)\n", + " # for row in csv_reader:\n", + " # non_food_raw_data2.append(row)\n", + "\n", + "\n", + "def shuffle_raw_data():\n", + " global food_raw_data, non_food_raw_data\n", + " # non_food_raw_data = non_food_raw_data[:15000]\n", + " # non_food_raw_data = non_food_raw_data[:len(food_raw_data)]\n", + " food_raw_data = shuffle(food_raw_data)\n", + " non_food_raw_data = shuffle(non_food_raw_data)\n", + " # non_food_raw_data = non_food_raw_data[:15000]\n", + " non_food_raw_data = non_food_raw_data[:len(food_raw_data)]\n", + " # non_food_raw_data = non_food_raw_data[00000]\n", + "\n", + " \n", + "\n", + "def div_train_test_raw_data(ratio=0.75):\n", + " global food_raw_data, non_food_raw_data, train_raw_food, \\\n", + " train_raw_non_food, test_raw_food, test_raw_non_food, \\\n", + " food_raw_data2, non_food_raw_data2\n", + " # remove some non_food_raw_data\n", + " # non_food_raw_data = non_food_raw_data[:10000]\n", + " train_food_len = int(len(food_raw_data) * ratio)\n", + " train_non_food_len = int(len(non_food_raw_data) * ratio)\n", + " train_raw_food = food_raw_data[0:train_food_len]\n", + " train_raw_non_food = non_food_raw_data[0:train_non_food_len]\n", + " test_raw_food = food_raw_data[train_food_len:]\n", + " test_raw_non_food = non_food_raw_data[train_non_food_len:]\n", + "\n", + " # train_raw_food = food_raw_data\n", + " # test_raw_food = food_raw_data2\n", + "\n", + " # train_raw_non_food = non_food_raw_data\n", + " # test_raw_non_food = non_food_raw_data2\n", + "\n", + "def save_raw_data_train_test():\n", + " global food_raw_data, non_food_raw_data, train_raw_food, \\\n", + " train_raw_non_food, test_raw_food, test_raw_non_food\n", + " with open('train_food.csv', 'w') as f:\n", + " write = csv.writer(f)\n", + " write.writerows(train_raw_food)\n", + " with open('train_non_food.csv', 'w') as f:\n", + " write = csv.writer(f)\n", + " write.writerows(train_raw_non_food )\n", + "\n", + " with open('test_food.csv', 'w') as f:\n", + " write = csv.writer(f)\n", + " write.writerows(test_raw_food )\n", + " with open('test_non_food.csv', 'w') as f:\n", + " write = csv.writer(f)\n", + " write.writerows(test_raw_non_food)\n", + "\n", + " for i in range(len(train_raw_food)):\n", + " train_raw_food[i] = train_raw_food[i][1:]\n", + " for i in range(len(train_raw_non_food)):\n", + " train_raw_non_food[i] = train_raw_non_food[i][1:]\n", + " for i in range(len(test_raw_food)):\n", + " test_raw_food[i] = test_raw_food[i][1:]\n", + " for i in range(len(test_raw_non_food)):\n", + " test_raw_non_food[i] = test_raw_non_food[i][1:]\n", + "\n", + " \n", + "\n", + "\n", + "def count_dict(raw_data, threshold=0.5):\n", + " counter_dict = {} # only collect from train data\n", + " for i in raw_data:\n", + " for j in range(0, len(i) - 1, 2):\n", + " tmp = str(i[j]).strip()\n", + " if float(i[j + 1]) > threshold:\n", + " if tmp not in counter_dict:\n", + " counter_dict[tmp] = 1\n", + " else:\n", + " counter_dict[tmp] += 1\n", + " else:\n", + " # if tmp not in counter_dict:\n", + " # counter_dict[tmp] = 0\n", + " pass\n", + " return counter_dict\n", + "\n", + "\n", + "def get_use_tag(use_all=False, threshold=0.5):\n", + " global useful_tag_list, food_raw_data, non_food_raw_data, useful_dict\n", + " useful_tag_list, useful_dict = [], {}\n", + " food_tag_dict = count_dict(train_raw_food)\n", + " non_food_tag_dict = count_dict(train_raw_non_food)\n", + "\n", + " if use_all:\n", + " for i in non_food_tag_dict.keys():\n", + " if i not in food_tag_dict.keys():\n", + " food_tag_dict[i] = non_food_tag_dict[i]\n", + " else:\n", + " food_tag_dict[i] += non_food_tag_dict[i]\n", + " # food_tag_dict.update(non_food_tag_dict)\n", + "\n", + " appear_times = 0\n", + " appear_list = []\n", + " for i in food_tag_dict.keys():\n", + " appear_times += food_tag_dict[i]\n", + " appear_list.append(food_tag_dict[i])\n", + " appear_list.sort(reverse=True)\n", + " useful_bound = int(appear_times * threshold)\n", + " bound = 0\n", + " pre_sum = 0\n", + " for i in range(len(appear_list)):\n", + " pre_sum += appear_list[i]\n", + " if pre_sum > useful_bound:\n", + " bound = appear_list[i]\n", + " break\n", + " for i in food_tag_dict.keys():\n", + " if food_tag_dict[i] > bound:\n", + " useful_tag_list.append(i)\n", + " counter = 0\n", + " for i in useful_tag_list:\n", + " useful_dict[i] = counter\n", + " counter += 1\n", + "\n", + "\n", + "def get_correlation():\n", + " global train_raw_food, correlation_dict\n", + " food_tag_dict = count_dict(train_raw_food)\n", + " merged_dict = count_dict(train_raw_non_food)\n", + "\n", + " for i in food_tag_dict.keys():\n", + " if i not in merged_dict.keys():\n", + " merged_dict[i] = food_tag_dict[i]\n", + " else:\n", + " merged_dict[i] += food_tag_dict[i]\n", + "\n", + " for i in food_tag_dict.keys():\n", + " if i not in correlation_dict.keys():\n", + " # correlation_dict[i] = food_tag_dict[i] / len(food_raw_data)\n", + " correlation_dict[i] = food_tag_dict[i] / len(train_raw_food)\n", + " # correlation_dict[i] = food_tag_dict[i] / merged_dict[i]\n", + " else:\n", + " print(\"error in get correlation function\")\n", + "\n", + "\n", + "def construct_train_test_set():\n", + " global train_raw_food, train_raw_non_food, test_raw_food, \\\n", + " test_raw_non_food, train_vector_x, train_vector_y, \\\n", + " test_vector_x, test_vector_y, train_vector_num\n", + " train_vector_x, train_vector_y, train_vector_num = [], [], []\n", + " test_vector_x, test_vector_y = [], []\n", + " vector_x = []\n", + " vector_y = []\n", + " for i in train_raw_food:\n", + " tmp = [0 for i in range(len(useful_tag_list))]\n", + " for j in range(0, len(i) - 1, 2):\n", + " if i[j] in useful_dict.keys():\n", + " tmp[useful_dict[i[j]]] = float(\n", + " i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1]))\n", + " else:\n", + " # TODO: should be changed to random probability\n", + " pass\n", + " vector_x.append(tmp)\n", + " vector_y.append(\"food\")\n", + " train_vector_num.append(1)\n", + "\n", + " for i in train_raw_non_food:\n", + " tmp = [0 for i in range(len(useful_tag_list))]\n", + " for j in range(0, len(i) - 1, 2):\n", + " if i[j] in useful_dict.keys():\n", + " tmp[useful_dict[i[j]]] = float(\n", + " i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1]))\n", + " else:\n", + " # TODO: should be changed to random probability\n", + " pass\n", + " vector_x.append(tmp)\n", + " vector_y.append(\"no food\")\n", + " train_vector_num.append(-1)\n", + "\n", + " train_vector_x, train_vector_y = vector_x, vector_y\n", + "\n", + " vector_x, vector_y = [], []\n", + "\n", + " for i in test_raw_food:\n", + " tmp = [0 for i in range(len(useful_tag_list))]\n", + " for j in range(0, len(i) - 1, 2):\n", + " if i[j] in useful_dict.keys():\n", + " tmp[useful_dict[i[j]]] = float(\n", + " i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1]))\n", + " else:\n", + " # TODO: should be changed to random probability\n", + " pass\n", + " vector_x.append(tmp)\n", + " vector_y.append(\"food\")\n", + "\n", + " for i in test_raw_non_food:\n", + " tmp = [0 for i in range(len(useful_tag_list))]\n", + " for j in range(0, len(i) - 1, 2):\n", + " if i[j] in useful_dict.keys():\n", + " tmp[useful_dict[i[j]]] = float(\n", + " i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1]))\n", + " else:\n", + " # TODO: should be changed to random probability\n", + " pass\n", + " vector_x.append(tmp)\n", + " vector_y.append(\"no food\")\n", + " test_vector_x, test_vector_y = vector_x, vector_y\n", + "\n", + "\n", + "def confision_matrix(ground_true, predict, print_result=False):\n", + " TP, FP, FN, TN = 0, 0, 0, 0\n", + " for i in range(len(ground_true)):\n", + " if ground_true[i] == \"food\" and predict[i] == \"food\":\n", + " TP += 1\n", + " elif ground_true[i] == \"no food\" and predict[i] == \"food\":\n", + " FP += 1\n", + " elif ground_true[i] == \"food\" and predict[i] == \"no food\":\n", + " FN += 1\n", + " elif ground_true[i] == \"no food\" and predict[i] == \"no food\":\n", + " TN += 1\n", + " TPR = TP / (TP + FN)\n", + " FPR = FP / (FP + TN)\n", + " if TP + FP != 0:\n", + " PPV = TP/(TP + FP) # precision\n", + " else:\n", + " PPV = 0\n", + " if print_result:\n", + " print(\"TP: \", TP, \"FN: \", FN, \"TN: \", TN, \"FP :\", FP)\n", + " print(TP, \" \", FN, \" \", TN, \" \", FP)\n", + " print(\"Sensitivity = \", TP/(TP+FN), end=\" \")\n", + " print(\"Specificity = \", TN/(TN+FP), \"burden: \", (TP + FP) / (TP+TN+FN+FP) )\n", + " # print(\"Precision = \", TP/(TP+FP), end=\" \")\n", + " # print(\"Accuracy = \", (TP + TN)/(TP+TN+FN+FP))\n", + " return TPR, FPR, PPV\n", + "\n", + "\n", + "def clarifai_result():\n", + " global test_raw_food, test_raw_non_food\n", + " TPR_list = []\n", + " FPR_list = []\n", + " for k in range(10):\n", + " TP, FP, FN, TN = 0, 0, 0, 0\n", + " ratio = k / 10\n", + " # print(ratio)\n", + " for i in test_raw_food:\n", + " have = False\n", + " for j in range(len(i)):\n", + " if i[j] == \"food\" and float(i[j + 1]) > ratio:\n", + " have = True\n", + " if not have:\n", + " FN += 1\n", + " else:\n", + " TP += 1\n", + "\n", + " for i in test_raw_non_food:\n", + " have = False\n", + " for j in range(len(i)):\n", + " if i[j] == \"food\" and float(i[j + 1]) > ratio:\n", + " have = True\n", + " if not have:\n", + " TN += 1\n", + " else:\n", + " FP += 1\n", + "\n", + " TPR = TP / (TP + FN)\n", + " FPR = FP / (FP + TN)\n", + " TPR_list.append(TPR)\n", + " FPR_list.append(FPR)\n", + " #if k == 9 or k == 8 or k == 7 or k ==6:\n", + " if k == 7 or k == 6 or k == 5 or k == 4:\n", + "# plt.scatter([FPR], [TPR], marker='o', c='green')\n", + " print(\"TP: \", TP, \"FN: \", FN, \"TN: \", TN, \"FP :\", FP, \"Sensitivity: \",TP / (TP + FN),\"Specificity: \",TN / (TN + FP),\"burden: \", (TP + FP) / (TP+TN+FN+FP) )\n", + " print(TP, \", \", FN, \", \", TN, \", \", FP)\n", + " \n", + " # print(\"TRP :\", TPR)\n", + " # print(\"FPR :\", FPR)\n", + " return TPR_list, FPR_list\n", + " #\n", + " # print(\"clarify result**********\")\n", + " # print(\"TP: \", TP, \"FN: \", FN, \"TN: \", TN, \"FP :\", FP)\n", + " # print(\"Sensitivity = \", TP / (TP + FN), end=\" \")\n", + " # print(\"Specificity = \", TN / (TN + FP))\n", + " # print(\"Precision = \", TP / (TP + FP), end=\" \")\n", + " # print(\"Accuracy = \", (TP + TN) / (TP + TN + FN + FP))\n", + " # print(\"burden = \", (TP + FP) / (TP +TN+FN+FP))\n", + " # plt.scatter([1 - 0.789866667, 1 - 0.684, 1 - 0.55786, 1-0.4512], [0.584493042, 0.666003976, 0.753479125, 0.833664679], marker='o', c='green')\n", + "\n", + "\n", + "def get_p_food_before_balance():\n", + " # food_num_ori = 0\n", + " # with open(\"#food_ori.csv\") as f:\n", + " # csv_reader = csv.reader(f)\n", + " # for row in csv_reader:\n", + " # food_num_ori += 1\n", + "\n", + " # no_food_num_ori = 0\n", + " # with open(\"#no food_ori.csv\") as f:\n", + " # csv_reader = csv.reader(f)\n", + " # for row in csv_reader:\n", + " # no_food_num_ori += 1\n", + " # p_food = food_num_ori / no_food_num_ori\n", + " # return p_food\n", + " # p_food = len(train_raw_food) / (len(train_raw_food) + len(train_raw_non_food))\n", + " # return p_food\n", + " pass\n", + "\n", + "\n", + "def init(use_all=True):\n", + " global p_food, train_raw_food, train_raw_non_food\n", + " get_raw_data()\n", + " shuffle_raw_data()\n", + " div_train_test_raw_data(0.75)\n", + " save_raw_data_train_test()\n", + " clarifai_result()\n", + " p_food = len(train_raw_food) / (len(train_raw_food) + len(train_raw_non_food))\n", + " get_correlation()\n", + " get_use_tag(use_all)\n", + " construct_train_test_set()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "scrolled": false, + "tags": [] + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "6182\n", + "TP: 1756 FN: 305 TN: 6491 FP : 6310 Sensitivity: 0.8520135856380397 Specificity: 0.5070697601749863 burden: 0.5427264163638811\n", + "1756 , 305 , 6491 , 6310\n", + "TP: 1630 FN: 431 TN: 7929 FP : 4872 Sensitivity: 0.7908782144590005 Specificity: 0.6194047340051558 burden: 0.4374915892881173\n", + "1630 , 431 , 7929 , 4872\n", + "TP: 1474 FN: 587 TN: 9281 FP : 3520 Sensitivity: 0.7151868025230471 Specificity: 0.7250214826966643 burden: 0.33602476113578256\n", + "1474 , 587 , 9281 , 3520\n", + "TP: 1296 FN: 765 TN: 10400 FP : 2401 Sensitivity: 0.62882096069869 Specificity: 0.812436528396219 burden: 0.24875521464136724\n", + "1296 , 765 , 10400 , 2401\n", + "init done\n" + ] + } + ], + "source": [ + "# init training data testing data and required parameter\n", + "\n", + "# p_food, train_raw_food, train_raw_non_food\n", + "# get_raw_data()\n", + "# shuffle_raw_data()\n", + "# div_train_test_raw_data(0.75)\n", + "# save_raw_data_train_test()\n", + "\n", + "get_raw_data_seperate(test_total = False)\n", + "print(len(train_raw_food))\n", + "\n", + "TPR_list, FPR_list = clarifai_result()\n", + "p_food = len(train_raw_food) / (len(train_raw_food) + len(train_raw_non_food))\n", + "get_correlation()\n", + "get_use_tag(use_all = True)\n", + "construct_train_test_set()\n", + "print('init done')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "trainning svm kernel done\n" + ] + } + ], + "source": [ + "clf = svm.SVC(kernel='poly') # order = 3(default)\n", + "clf.fit(train_vector_x, train_vector_y)\n", + "print('trainning svm kernel done')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n\n food 0.90 0.92 0.91 6182\n no food 0.91 0.90 0.90 6182\n\n accuracy 0.91 12364\n macro avg 0.91 0.91 0.91 12364\nweighted avg 0.91 0.91 0.91 12364\n\nTP: 5663 FN: 519 TN: 5534 FP : 648\n5663 519 5534 648\nSensitivity = 0.9160465868650922 Specificity = 0.8951795535425429 burden: 0.5104335166612747\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(0.9160465868650922, 0.10482044645745714, 0.8973221359530977)" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "### svm training dataset report\n", + "svm_predict = clf.predict(train_vector_x)\n", + "print(classification_report(train_vector_y, svm_predict))\n", + "confision_matrix(train_vector_y, svm_predict, print_result=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "scrolled": true + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "***********************testing set report ********************************\n", + "TP: 1750 FN: 311 TN: 10545 FP : 2256\n", + "1750 311 10545 2256\n", + "Sensitivity = 0.849102377486657 Specificity = 0.8237637684555894 burden: 0.26954649441528733\n" + ] + } + ], + "source": [ + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "print('***********************testing set report ********************************')\n", + "svm_predict = clf.predict(test_vector_x)\n", + "TPR, FPR, PPV = confision_matrix(test_vector_y, svm_predict, print_result=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['food' 'food' 'food' ... 'no food' 'no food' 'no food']\n", + "2061\n", + "12801\n", + "food\n", + "no food\n", + "done\n" + ] + } + ], + "source": [ + "# get FP and FN\n", + "# print(test_vector_y)\n", + "print(svm_predict)\n", + "print(len(test_raw_food))\n", + "print(len(test_raw_non_food))\n", + "print(test_vector_y[2060])\n", + "print(test_vector_y[2061])\n", + "\n", + "FN = []\n", + "FP = []\n", + "for i in range(len(svm_predict)):\n", + " if svm_predict[i] != test_vector_y[i]:\n", + " if i < 2061:\n", + " FN.append(i)\n", + " else:\n", + " FP.append(i-len(test_raw_food))\n", + "# print(FN)\n", + "# print(FP)\n", + "test_raw_food, test_raw_non_food = [], []\n", + "with open(\"test_food.csv\") as f:\n", + " csv_reader = csv.reader(f)\n", + " for row in csv_reader:\n", + " test_raw_food.append(row)\n", + "\n", + "with open(\"test_non_food.csv\") as f:\n", + " csv_reader = csv.reader(f)\n", + " for row in csv_reader:\n", + " test_raw_non_food.append(row)\n", + "with open(\"FN.txt\", 'w') as f:\n", + " for i in FN:\n", + " f.write(test_raw_food[i][0]+\"\\n\")\n", + "with open(\"./FP.txt\", 'w') as f:\n", + " for i in FP:\n", + " f.write(test_raw_non_food[i][0]+\"\\n\")\n", + "print(\"done\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-02-11T07:45:40.333331\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEKCAYAAAAMzhLIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAfWklEQVR4nO3dfZRU1Znv8e8voC7fRSHEgIwEMYiKqK2AySiYeIGYqNw4Gs0kmjdCgPiSdRPJRCeZJDPoyowhLluMyzdMjI6KCndiRG8SwQRRMSIq3HiJGhrQEdAliVGh8bl/nOruouiuOl3Uqa6q/n3WqlV1Xmv3WXCe2vvZex9FBGZmZl15X08XwMzMapsDhZmZFeVAYWZmRTlQmJlZUQ4UZmZWlAOFmZkVlVmgkHSzpNckPdfFdkm6RtIaSSslHZdVWczMrHxZ1ihuBSYV2T4ZGJ57TQXmZlgWMzMrU2aBIiKWAK8X2eVM4LZILAMOkHRwVuUxM7Py9O3B7x4EtOQtr8ute6VwR0lTSWod7L333sePGDGiKgU0MyvH1q2wcSMMGAC7755+264c29m29evh1VfhAx+AV199alNEDCjrD4qIzF7AocBzXWz7JfDRvOVfA8eXOufxxx8fZmbdtXZtxGWXJe+V2FbsmMsui4CIWbO6t21Xju1s29q1yfLatRHA8ij3Xl7ugalOXjxQ/BQ4L2/5j8DBpc7pQGHWu5V7w6/kDbjUMfk36O5s25VjS523XgPF6cCvAAFjgSfSnNOBwqzxdHaD7+qmX+4Nv9I34FI35lpTk4ECuIMk37CNJP/wJWAaMC23XUAz8CfgWaApzXkdKMxqT9rmme7c/Lu66e/Kr+rerCYDRVYvBwqz6it1w0/bPNOdm79v+pW1K4GiJ3s9mVmNaGmB5maYMQMOOWTn5eZmuOoqkGD27J2XZ8xIPk+fvuN5C9d3td8hhyTnKbXOeki5EaanXq5RmJWnWK2g1C//wl/3/rVff3CNwsx2pVZQ6pd/4a97/9rvXRwozOpA/k0fOv/c3eah/GUHAivGgcKsh3U3CER0/tm1AsuKA4VZD8gPDuUEgc4+OxBYVhwozDLWWY1hyxaYO7f8INDVZ7MsKEmG14+mpqZYvnx5TxfDrFPFgsKsWR21hOnTYb/9OoKAWdYkPRURTeUc6xqFWZlK1RTyg8KsWZ3XEszqgQOFWUptgWHKFLjvvvKDgpuKrN74mdk17u233+aUU05h+/btAMybN4/hw4czfPhw5s2b1+kxS5Ys4bjjjqNv377cc889O2z71re+xZFHHskRRxzBRRddRKmmx3fffZdzzz2Xww47jDFjxvDyyy93ut8dd9zB0UcfzahRo5g0aRKbNm0C4NZbb2XAgAGMHj2a0aNHc+ONNwKwceNGJk0q9gDEntPSktzsH398x/fZs5NgcMklHUnmtqAwY0byuW2/Qw7pyCu45mB1r9yRej316m0js6+99tqYM2dORERs3rw5hg4dGps3b47XX389hg4dGq+//vpOx7z00kvxzDPPxOc+97m4++6729f//ve/j5NOOilaW1ujtbU1xo4dG7/97W+Lfn9zc3N89atfjYiIO+64I84555yd9tm2bVsMGDAgNm7cGBER3/zmN+O73/1uRETccsstMWPGjE7PfeGFF8bvfve7ktcgK20jkpct2/H9a19LRiWPHbvj+/TpyWjkZcs8KtnqDx6Z3bhuv/12fvGLXwCwaNEiTjvtNA488EAATjvtNB588EHOO++8HY459NBDAXjf+3asMErinXfeYevWrUQE27ZtY+DAgUW/f8GCBXzve98D4Oyzz2bmzJlEBJLa92n7x/TWW29x0EEHsWXLFg477LCSf9tZZ53F7bffzkc+8pGS+1ZCV01HixfDsmUd723NR2edBfff3/Ge34Q0ZkxVimxWExwoatjWrVt58cUX22/869ev55C8dozBgwezfv361OcbN24cEyZM4OCDDyYimDlzJkcccUTRY/K/s2/fvuy///5s3ryZ/v37t++z2267MXfuXI4++mj23ntvhg8fTnNzc/v2+fPns2TJEg4//HB+/OMft5+vqamJyy+/PHX5y9UWIAoDQ3cCggOD9WbOUdSwTZs2ccABB7QvRyf5hPxf9qWsWbOG1atXs27dOtavX89vfvMblixZUvSYNN+5bds25s6dy9NPP82GDRsYNWoUs3MZ20996lO8/PLLrFy5ko9//ONccMEF7ce9//3vZ8OGDanLn0Zn+YV/+Icdcwpz5uyYTxgzZsd35xTMduQaRQ3bc889eeedd9qXBw8ezCOPPNK+vG7dOsaPH5/6fPfddx9jx45ln332AWDy5MksW7aMk08+uctjBg8eTEtLC4MHD6a1tZU333yzvemrzYoVKwAYNmwYAOeccw5XXnklAAcddFD7fl/5yle47LLL2pffeecd9txzz9TlL1TYlDRlClx8cRIc8puTHn8cxo5NAoObjsy6zzWKGtavXz+2b9/eHiwmTpzIQw89xBtvvMEbb7zBQw89xMSJE1Ofb8iQISxevJjW1la2bdvG4sWL25uevv3tb3PfffftdMwZZ5zR3rvqnnvu4dRTT92pRjFo0CBWrVrFxo0bAXj44Yfbz/vKK6+077dw4cIdmrpeeOEFjjrqqNTlhx1rDG01hbZeSJdc0hEU2moNbe933eWaglnZys2C99Srt/V6+uIXvxgPP/xw+/JNN90Uw4YNi2HDhsXNN9/cvv6KK66IBQsWRETEE088EYMGDYq99torDjzwwBg5cmRERLS2tsbUqVNjxIgRccQRR8Sll17afvzpp58eS5cu3en733777Tj77LNj2LBhccIJJ8Sf/vSn9m3HHHNM++e5c+fGiBEj4uijj45PfvKTsWnTpoiImDVrVowcOTJGjRoV48ePj9WrV7cf86Mf/SiuueaaLv/2tWuTHkhf+1rxHkltvZDcG8msa+xCrydP4VHjnn76aa6++mp+9rOfZfo9EydOZNGiRZl+R6GTTz6ZBQsW0K9fPyCpLbQNRrvggo5mJEhqCW0J6P326zzxbGZd8xQeDezYY49lwoQJbN++nT59+mT2PdUOEitWbKR//2/wwgv9yB83OHdu8v7000mQOPZYGDcOPv95d1E16ymuUVhVtdUaHnsMVqzoqCnAjs9R7iwwmFn5XKOwmpbfO6mwOWnOHLjttmQ5v1cSuMZgViscKCwTnQWHtq6qbc1JbYHBAcGstjlQWMUUCw5ttQc3J5nVHweKWvXmm3DSSbB0Key/f0+XpkttOYe//hWefTbJOxQLDq49mNUfB4pa9ctfwqpV8MADUDDpXy0oTEq3cXAwazzu9VRrzj8fFi6Ed9+F1lbo2xf22APOOANys8j2lK6S0sceC0cdBfvuu3NC2sxqg3s9NZLvfz/5if7yy0mg2G03+Lu/gx/8oEeK052ktJk1JgeKWnPYYUmwOO882HvvpGbxL/8CuQn3qqWwaclJabPey4GiFt11VxIkrrgiqUncfTecfXamX5lfc5g3b8fcg/MOZr2bcxS16MknYcgQGDgQ/vu/k7t4U1lNiyUVGyntpiWzxuEcRaM54YSOzwMHJq8MtLQkU3WnHSltZr2TA0UvVFiL8EhpMyvGgaKX6awW4Yf6mFkxDhS9SH6QcP7BzNLKNFBImgT8BOgD3BgRVxZs3x/4OTAkV5Z/j4hbsixTb9XSkozZa2tqWrDAAcLM0snsmdmS+gDNwGRgJHCepJEFu80AVkXEMcB44D8k7Z5VmXqrtppEW3fXceMcJMwsvSxrFCcCayLiRQBJdwJnAqvy9glgX0kC9gFeB1ozLFOvkD9R3z77JOsKm5vMzNLKMlAMAlryltcBhf1prgUWAhuAfYFzI+K9whNJmgpMBRgyZEgmhW0ks2d3PFIUkkFys2Z5JLWZlSfLQKFO1hWO7psIrABOBYYBD0t6NCK27HBQxA3ADZAMuKt8URtDW01i8eJk+cgj4ZRTnLA2s12TZaBYB+TfngaT1BzyfQG4MpLh4WskvQSMAJ7IsFwNp7Mpv93t1cwqJctA8SQwXNJQYD3wGeD8gn3WAh8DHpU0EPgw8GKGZWo4heMi3O3VzCots0AREa2SZgKLSLrH3hwRz0ualtt+PfAD4FZJz5I0VV0WEZuyKlOj8bgIM6uGTMdRRMQDwAMF667P+7wB+B9ZlqERPf44TJsG27bB88+7mcnMsuWR2XWkLRcxfz689lqyzkHCzLLmQFEnCnMRAwfCxInwwx86SJhZthwo6oBzEWbWkxwoalhnDxVyM5OZVZsDRY3ydOBmViscKGpMqYcKmZlVmwNFjcmfp8m1CDOrBZlNM27d19KS1CQgqUk4SJhZLXCgqAEtLcnMrm0PFho71g8WMrPa4aanGuDmJjOrZa5R9KC2mkTbtOBubjKzWuQaRQ9x91czqxcOFD2gpaUjH+GHC5lZrXOgqLK2mkTbA4ZOOQWam3u0SGZmRTlQVFFXczaZmdUyB4oqmj27I0i4+6uZ1Qv3eqqCwt5N48Y5SJhZ/XCNImOd9W5yc5OZ1RMHigz5ORJm1ggcKDKSHyQ8RsLM6plzFBlwkDCzRuJAkYH83k0OEmZW7xwoKix/qnD3bjKzRuBAUUH5o67du8nMGoUDRYU4L2FmjcqBogIcJMyskTlQVEBzs4OEmTUuB4pd1NICW7YkU3Q4SJhZI3Kg2AWPPw5jxiSPMd1vPwcJM2tMDhRlammBKVPglVfggx9MahRmZo3IgaJMzc0dQeLee12bMLPG5UBRhvy8xLJlSfOTmVmjcqAoQ3Oz8xJm1ns4UJRhypSkK+xZZ/V0SczMspdpoJA0SdIfJa2R1OmEFpLGS1oh6XlJi7MsTyW0tMDFFydNTvff39OlMTPLXmbPo5DUB2gGTgPWAU9KWhgRq/L2OQC4DpgUEWslvT+r8lRC4Qhs93Qys94gVY1C0nxJp0vqTg3kRGBNRLwYEVuBO4EzC/Y5H7g3ItYCRMRr3Th/1XkEtpn1Rmlv/HNJbur/T9KVkkakOGYQ0JK3vC63Lt/hQD9Jj0h6StLnOzuRpKmSlktavnHjxpRFriyPwDaz3ipVoIiI/xMRnwWOA14GHpa0VNIXJO3WxWHq7FQFy32B44HTgYnAFZIO7+T7b4iIpohoGjBgQJoiV1Rbk5N7OplZb5S6KUnSQcCFwJeBp4GfkASOh7s4ZB2Qf0sdDGzoZJ8HI+KtiNgELAGOSVumanBewsx6u7Q5inuBR4G9gE9FxBkR8Z8R8XVgny4OexIYLmmopN2BzwALC/ZZAPy9pL6S9gLGAKvL+UOy4ryEmfV2aXs93RgRD+SvkLRHRLwbEU2dHRARrZJmAouAPsDNEfG8pGm57ddHxGpJDwIrgfdy3/Nc2X9NheXnJWbNcpAws95JEYVpg052kv4QEceVWlcNTU1NsXz58qp81/TpSV5i+vSkZmFmVq8kPdXVD/tSitYoJH2ApKfSnpKOpSNBvR9JM5SZmTW4Uk1PE0kS2IOBq/PW/wX4p4zKVBNach1725qdzMx6q6KBIiLmAfMkfToi5lepTD0uv6eTcxNm1tuVanr6x4j4OXCopG8Ubo+Iqzs5rK65O6yZ2Y5KNT3tnXvvqgtsw3F3WDOzHZVqevpp7uN1EdEzc2dUkbvDmpntLO3I7KWSHpL0JUn9Mi1RD5o9O+kOCw4SZmZt0s71NBy4HDgSeErSf0n6x0xLZmZmNSH1XE8R8UREfINk+vDXgXmZlaoHuDusmVnn0s71tJ+kCyT9ClgKvEISMBqCZ4c1M+ta2rmengHuB74fEY9lV5yekd/Tyd1hzcx2lDZQfCjSTApVh9zTycysuFID7uZExCXAQkk7BYqIOCOrglWDR2CbmZVWqkbxs9z7v2ddkJ7gJiczs9KKJrMj4qncx9ERsTj/BYzOvHQZ8jOwzczSSds99oJO1l1YwXJUXXOzezmZmaVRKkdxHnA+MFRS/mNM9wU2Z1mwLOXXJtzkZGZWXKkcRduYif7Af+St/wvJ40vrUlttwglsM7PSSk0K+Gfgz8C46hSnOqZMgcWL4ayzerokZma1r2iOQtLvcu9/kbQl7/UXSVuqU8TKu+8+WLYM7r+/p0tiZlb7StUoPpp737c6xcme8xNmZt2Tdq6nYZL2yH0eL+kiSQdkWrKMuLeTmVn3pO0eOx/YLukw4CZgKPCLzEqVEdcmzMy6L22geC8iWoEpwJyIuBQ4OLtiZcO1CTOz7ks7KeC23JiKC4BP5dbtlk2RsjNjBkiuTZiZdUfaGsUXSLrI/mtEvCRpKPDz7IpVeS0tSY1i+nTXJszMuiNVjSIiVgEX5S2/BFyZVaGy0NwMV12V1Chmz+7p0piZ1Y9UgULSR4DvAX+XO0ZARMSHsitaZbnZycysPGlzFDcBlwJPAduzK062GvPRS2Zm2UobKN6MiF9lWpKMuenJzKw8aQPFbyX9CLgXeLdtZUT8IZNSZcBNT2Zm5UkbKMbk3pvy1gVwamWLk422Hk8zZrjHk5lZd6Xt9TQh64Jkyc1OZmblS9vraSDwb8AHI2KypJHAuIi4KdPSVYibnczMypd2wN2twCLgg7nlF4BLMihPxXmgnZnZrkkbKPpHxF3AewC5eZ9KdpOVNEnSHyWtkTSryH4nSNou6eyU5UmtrdnpuusqfWYzs94hbaB4S9JBJAlsJI0F3ix2gKQ+QDMwGRgJnJdrsupsv6tIaiwV5dlizcx2XdpA8Q1gITBM0u+B24CvlzjmRGBNRLwYEVuBO4EzO9nv6yTTmL+WsiypebZYM7NdVzSZLekEoCUi/iDpFOCrwKeBh4B1Jc49CGjJW15HRzfbtvMPIpm6/FTghCLlmApMBRgyZEiJr+3gJLaZ2a4rVaP4KbA19/kk4DskzUlvADeUOFadrCucRGMOcFlEFM13RMQNEdEUEU0DBgwo8bUJJ7HNzCqjVPfYPhHxeu7zucANETEfmC9pRYlj1wH5t+jBwIaCfZqAOyUB9Ac+Iak1Iu5PUfaiPHbCzKwySgYKSX1zvZw+Rq75J+WxTwLDc8+uWA98Bjg/f4eIGNr2WdKtwH9VIkiAm53MzCql1M3+DmCxpE3A28CjALlnZxft9RQRrZJmkvRm6gPcHBHPS5qW2379rha+FM8Wa2a264oGioj4V0m/Jnk+9kMR7bfe91G61xMR8QDwQMG6TgNERFyYpsBpuenJzKwySk7hERHLOln3QjbFqRw3PZmZVUbacRR1xT2ezMwqpyEDhaftMDOrnLTPo6grbnYyM6uchgwUhxziBLaZWaU0XNNTSwvMmpW8m5nZrmu4QOH8hJlZZTVc05PzE2ZmldVwNQrwiGwzs0pquEDhpiczs8py05OZmRXVcIHCXWPNzCqroZqe3DXWzKzyGipQOD9hZlZ5DdX05PyEmVnlNVSgcH7CzKzyGqbpyfkJM7NsNEygcH7CzCwbDdP05PyEmVk2GqJG4SfamZllpyEChZudzMyy0xBNT252MjPLTkMECneLNTPLTkM0PZmZWXYaIlB4DIWZWXYaIlA4mW1mlp2GyFE4mW1mlp2GCBROZpuZZachmp7MzCw7dR8onMg2M8tW3QcKJ7LNzLJV9zkKJ7LNzLJV94HCiWwzs2zVddOT8xNmZtnLNFBImiTpj5LWSJrVyfbPSlqZey2VdEx3zu/8hJlZ9jJrepLUB2gGTgPWAU9KWhgRq/J2ewk4JSLekDQZuAEYk/Y7nJ8wM8teljWKE4E1EfFiRGwF7gTOzN8hIpZGxBu5xWXA4O5+ScQul9PMzIrIMlAMAvKzB+ty67ryJeBXnW2QNFXScknLN27c2L7eTU9mZtnLsteTOlnX6e9/SRNIAsVHO9seETeQNEvR1NTUfg43PZmZZS/LQLEOyH+C9WBgQ+FOkkYBNwKTI2Jzd77AXWPNzLKXZdPTk8BwSUMl7Q58BliYv4OkIcC9wOci4oUMy2JmZmXKLFBERCswE1gErAbuiojnJU2TNC232z8DBwHXSVohaXna83sMhZlZdWQ6MjsiHgAeKFh3fd7nLwNfLufcbYlsyc1PZmZZqtspPJzINjOrjroNFE5km5lVR13P9WRmZtmry0DhRLaZWfXUZaDwiGwzs+qpyxyFE9lmZtVTl4HCiWwzs+qpy6YnMzOrHgcKMzMrqi4DhXs9mZlVT10GCvd6MjOrnrpMZrvXk5lZ9dRloHCvJzOz6qnLpiczM6ueugwUTmabmVVPXQYKJ7PNzKqnLnMUTmabmVVPXQYKJ7PNzKqn7pqetm51fsLMrJrqLlBs3Oj8hJlZNdVd09OAAXDhhc5PmJlVS90Fit13d37CzKya6q7pyTkKM7PqqrtA4RyFmVl11V3Tk3MUZmbVVXeBwjkKM7PqqrumJzMzq666CxROZpuZVVfdBQons83MqqvuchROZpuZVVfdBQons83Mqqvump7Wr3d+wsysmuouULz6qvMTZmbVVHeBYu+94ayzeroUZma9R90Firfegvvv7+lSmJn1HpkGCkmTJP1R0hpJszrZLknX5LavlHRcqXN+4APu8WRmVk2ZBQpJfYBmYDIwEjhP0siC3SYDw3OvqcDcrMpjZmblybJGcSKwJiJejIitwJ3AmQX7nAncFollwAGSDi520ldfhcsvz6bAZma2syzHUQwC8juyrgPGpNhnEPBK/k6SppLUOIA9uO22I/92222rVle4vPWoP7CppwtRI3wtOvhadPC16PDhcg/MMlCok3VRxj5ExA3ADQCSlkc837Trxat/ybUIXwt8LfL5WnTwteggaXm5x2bZ9LQOOCRveTCwoYx9zMysB2UZKJ4EhksaKml34DPAwoJ9FgKfz/V+Ggu8GRGvFJ7IzMx6TmZNTxHRKmkmsAjoA9wcEc9Lmpbbfj3wAPAJYA3wN+ALKU59Q0ZFrke+Fh18LTr4WnTwtehQ9rVQxE4pATMzs3Z1NzLbzMyqy4HCzMyKqtlAkcX0H/UqxbX4bO4arJS0VNIxPVHOaih1LfL2O0HSdklnV7N81ZTmWkgaL2mFpOclLa52Gaslxf+R/SX9b0nP5K5Fmnxo3ZF0s6TXJD3Xxfby7psRUXMvkuT3n4APAbsDzwAjC/b5BPArkrEYY4HHe7rcPXgtTgL65T5P7s3XIm+/35B0lji7p8vdg/8uDgBWAUNyy+/v6XL34LX4J+Cq3OcBwOvA7j1d9gyuxcnAccBzXWwv675ZqzWKTKb/qFMlr0VELI2IN3KLy0jGozSiNP8uAL4OzAdeq2bhqizNtTgfuDci1gJERKNejzTXIoB9JQnYhyRQtFa3mNmLiCUkf1tXyrpv1mqg6Gpqj+7u0wi6+3d+ieQXQyMqeS0kDQKmANdXsVw9Ic2/i8OBfpIekfSUpM9XrXTVleZaXAscQTKg91ng4oh4rzrFqyll3Tdr9ZnZFZv+owGk/jslTSAJFB/NtEQ9J821mANcFhHbkx+PDSvNtegLHA98DNgTeEzSsoh4IevCVVmaazERWAGcCgwDHpb0aERsybhstaas+2atBgpP/9Eh1d8paRRwIzA5IjZXqWzVluZaNAF35oJEf+ATkloj4v6qlLB60v4f2RQRbwFvSVoCHAM0WqBIcy2+AFwZSUP9GkkvASOAJ6pTxJpR1n2zVpuePP1Hh5LXQtIQ4F7gcw34azFfyWsREUMj4tCIOBS4B5jegEEC0v0fWQD8vaS+kvYimb25EWddTnMt1pLUrJA0kGQm1RerWsraUNZ9syZrFJHd9B91J+W1+GfgIOC63C/p1mjAGTNTXoteIc21iIjVkh4EVgLvATdGRKfdJutZyn8XPwBulfQsSfPLZRHRcNOPS7oDGA/0l7QO+C6wG+zafdNTeJiZWVG12vRkZmY1woHCzMyKcqAwM7OiHCjMzKwoBwozMyvKgcLqWqnZMss853dyM4yuzM28OqZS586d/wFJB+Q+XyRptaTbJZ1RbEbc3P5Lc++HSjq/kuUy64q7x1pdk3Qy8FeSic6OqsD5xgFXA+Mj4l1J/UlmGc1k1L+k/0symv6lbh43HvhfEfHJLMplls81CqtrKWbL7K6DSaa9eDd3/k1tQULSy5KukvRE7nVYbv0ASfMlPZl7fSS3fh9Jt0h6Nlc7+XTeefpLup5kauyFki6VdKGka3P7DJR0X+75Cc9IOim3/q+5cl5JMup6Re7YRyWNbvsjJP0+N62L2S5zoDDb0UPAIZJekHSdpFMKtm+JiBNJZiOdk1v3E+DHEXEC8GmSObcAriCZIuHoiBhF8oyMdhExjWSenQkR8eOC77kGWBwRx5A8X+D5gu2zgEcjYnTu2BuBCwEkHQ7sEREru//nm+3MgcIanqT/Kem5Tl6LCveNiL+SzLg6FdgI/KekC/N2uSPvfVzu88eBayWtIJlLZz9J++bWN+ed+w3SOxWYmztue0S8WWL/u4FPStoN+CJwaze+y6yompzryaySIuJekkkT0+6/HXgEeCQ3N9AFdNx485N6bZ/fB4yLiLfzz6Nk4q2qJAEj4m+SHiZ5MM05JLPomlWEaxRmeSR9WNLwvFWjgT/nLZ+b9/5Y7vNDwMy8c4zuYn2/bhTl18DXcsf1kbRfwfa/APsWrLuRpMnqyYioZN7GejkHCqtrudkyHwM+LGmdpC/t4in3AeZJWiVpJTAS+F7e9j0kPQ5cDFyaW3cR0JRLWK8CpuXW/5DkCXPPSXoGmNCNclwMTMjVaJ4CjizYvhJozSW6LwWIiKeALcAt3fges5LcPdYsJUkvA021Oj21pA+SNJmN6KWP+bSMuEZh1gCUPA/7ceA7DhJWaa5RmJlZUa5RmJlZUQ4UZmZWlAOFmZkV5UBhZmZFOVCYmVlR/x8oFyg49Zu+eAAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "##%\n", + "\n", + "# linera discriminant analysis\n", + "vector_x_np = np.array(train_vector_x)\n", + "vector_y_np = np.array(train_vector_num)\n", + "\n", + "\n", + "lda = LinearDiscriminantAnalysis().fit(vector_x_np, vector_y_np)\n", + "predict_y_prob = lda.predict_proba(test_vector_x)\n", + "predict_y = lda.predict(test_vector_x)\n", + "\n", + "lda = LinearDiscriminantAnalysis().fit(vector_x_np, vector_y_np)\n", + "predict_y_prob = lda.predict_proba(test_vector_x)\n", + "predict_y = lda.predict(test_vector_x)\n", + "# print(predict_y_prob)\n", + "# print(predict_y)\n", + "TPR_logistic, FPR_logistic = [], []\n", + "step = 200\n", + "for threshold in range(0,step):\n", + " predict_y_str = []\n", + " for i in predict_y_prob:\n", + " if i[0] >= threshold/step:\n", + " predict_y_str.append(\"no food\")\n", + " else:\n", + " predict_y_str.append(\"food\")\n", + " TPR_tmp, FPR_tmp, PPV_tmp = confision_matrix(test_vector_y, predict_y_str, print_result=False)\n", + " # TPR_logistic.append(PPV_tmp)\n", + " # FPR_logistic.append(TPR_tmp)\n", + " TPR_logistic.append(TPR_tmp)\n", + " FPR_logistic.append(FPR_tmp)\n", + "TPR_jia = [0.984958758, 0.899563319, 0.712760796, 0.463367297]\n", + "FPR_jia = [0.988125928, 0.930864776, 0.785094914, 0.573783298]\n", + "plt.scatter(FPR_logistic,TPR_logistic,marker='.',c='blue', s=5)\n", + "# plt.scatter(FPR_jia, TPR_jia, marker='.', c='#E68600')\n", + "plt.scatter([FPR], [TPR], marker='*', c='red')\n", + "# plt.scatter([TPR], [PPV], marker='*', c='red')\n", + "plt.text(FPR, TPR+0.05, '({:.2f}, {:.2f})'.format(FPR, TPR))\n", + "# plt.scatter(FPR_list, TPR_list, marker='.', c='green', s=0.5)\n", + "plt.xlabel('1 − Specificity')\n", + "plt.ylabel('Sensitivity')\n", + "\n", + "# plt.scatter([FPR], [TPR], marker='*', c='red')\n", + "# plt.scatter(FPR_list[4:8], TPR_list[4:8], marker='.', c='green')\n", + "# plt.plot([0, 1], [0, 1], linestyle='--', color='magenta')\n", + "plt.xlim(0, 1)\n", + "plt.ylim(0, 1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "#######################################\n", + "# this block is for logistic regreesion\n", + "#######################################\n", + "# reg = LogisticRegression(max_iter=300).fit(vector_x_np, vector_y_np)\n", + "# predict_y_prob = reg.predict_proba(test_vector_x)\n", + "# predict_y = reg.predict(test_vector_x)\n", + "# TPR_logistic, FPR_logistic = [], []\n", + "# step = 200\n", + "# for threshold in range(0,step):\n", + "# predict_y_str = []\n", + "# for i in predict_y_prob:\n", + "# if i[0] >= threshold/step:\n", + "# predict_y_str.append(\"no food\")\n", + "# else:\n", + "# predict_y_str.append(\"food\")\n", + "# TPR_tmp, FPR_tmp = confision_matrix(test_vector_y, predict_y_str, print_result=False)\n", + "# TPR_logistic.append(TPR_tmp)\n", + "# FPR_logistic.append(FPR_tmp)\n", + "\n", + "# plt.scatter(FPR_logistic,TPR_logistic,marker='.',c='blue')\n", + "\n", + "# plt.scatter([FPR], [TPR], marker='*', c='red')\n", + "# plt.scatter(FPR_list, TPR_list, marker='.', c='green', s=0.005)\n", + "\n", + "# plt.plot([0, 1], [0, 1], linestyle='--', color='magenta')\n", + "# plt.xlim(0, 1)\n", + "# plt.ylim(0, 1)\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "name": "pycharm-d87231c7", + "language": "python", + "display_name": "PyCharm (all_food_no_food.new)" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1-final" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/svm_nb_funcs.py b/svm_nb_funcs.py new file mode 100644 index 0000000..62b8b97 --- /dev/null +++ b/svm_nb_funcs.py @@ -0,0 +1,351 @@ +from sklearn import svm +import csv +from sklearn.utils import shuffle + +from sklearn.metrics import classification_report +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import LogisticRegression +import numpy as np +import matplotlib.pyplot as plt + +food_file_path, non_food_file_path = 'food.csv', 'no_food.csv' +# food_file_path2, non_food_file_path2 = 'food2.csv', 'no_food2.csv' +food_raw_data, non_food_raw_data = [], [] +# food_raw_data2, non_food_raw_data2 = [], [] + +train_raw_food, train_raw_non_food = [], [] +test_raw_food, test_raw_non_food = [], [] + +train_vector_x, train_vector_y, train_vector_num = [], [], [] +test_vector_x, test_vector_y = [], [] + +useful_tag_list, useful_dict = [], {} + +correlation_dict = {} + +p_food = 0 + + +def get_raw_data(): + global food_raw_data, non_food_raw_data + + with open(food_file_path) as f: + csv_reader = csv.reader(f) + for row in csv_reader: + food_raw_data.append(row) + with open(non_food_file_path) as f: + csv_reader = csv.reader(f) + for row in csv_reader: + non_food_raw_data.append(row) + + # with open(food_file_path2) as f: + # csv_reader = csv.reader(f) + # for row in csv_reader: + # food_raw_data2.append(row) + # with open(non_food_file_path2) as f: + # csv_reader = csv.reader(f) + # for row in csv_reader: + # non_food_raw_data2.append(row) + + +def shuffle_raw_data(): + global food_raw_data, non_food_raw_data + # non_food_raw_data = non_food_raw_data[:15000] + # non_food_raw_data = non_food_raw_data[:len(food_raw_data)] + food_raw_data = shuffle(food_raw_data) + non_food_raw_data = shuffle(non_food_raw_data) + # non_food_raw_data = non_food_raw_data[:15000] + non_food_raw_data = non_food_raw_data[:len(food_raw_data)] + # non_food_raw_data = non_food_raw_data[00000] + + + +def div_train_test_raw_data(ratio=0.75): + global food_raw_data, non_food_raw_data, train_raw_food, \ + train_raw_non_food, test_raw_food, test_raw_non_food, \ + food_raw_data2, non_food_raw_data2 + # remove some non_food_raw_data + # non_food_raw_data = non_food_raw_data[:10000] + train_food_len = int(len(food_raw_data) * ratio) + train_non_food_len = int(len(non_food_raw_data) * ratio) + train_raw_food = food_raw_data[0:train_food_len] + train_raw_non_food = non_food_raw_data[0:train_non_food_len] + test_raw_food = food_raw_data[train_food_len:] + test_raw_non_food = non_food_raw_data[train_non_food_len:] + + # train_raw_food = food_raw_data + # test_raw_food = food_raw_data2 + + # train_raw_non_food = non_food_raw_data + # test_raw_non_food = non_food_raw_data2 + +def save_raw_data_train_test(): + global food_raw_data, non_food_raw_data, train_raw_food, \ + train_raw_non_food, test_raw_food, test_raw_non_food + with open('train_food.csv', 'w') as f: + write = csv.writer(f) + write.writerows(train_raw_food) + with open('train_non_food.csv', 'w') as f: + write = csv.writer(f) + write.writerows(train_raw_non_food ) + + with open('test_food.csv', 'w') as f: + write = csv.writer(f) + write.writerows(test_raw_food ) + with open('test_non_food.csv', 'w') as f: + write = csv.writer(f) + write.writerows(test_raw_non_food) + + for i in train_raw_food: + i = i[1:] + for i in train_raw_non_food: + i = i[1:] + for i in test_raw_food: + i = i[1:] + for i in test_raw_non_food: + i = i[1:] + print(len(train_raw_food)) + + + + + +def count_dict(raw_data, threshold=0.5): + counter_dict = {} # only collect from train data + for i in raw_data: + for j in range(0, len(i) - 1, 2): + tmp = str(i[j]).strip() + if float(i[j + 1]) > threshold: + if tmp not in counter_dict: + counter_dict[tmp] = 1 + else: + counter_dict[tmp] += 1 + else: + # if tmp not in counter_dict: + # counter_dict[tmp] = 0 + pass + return counter_dict + + +def get_use_tag(use_all=False, threshold=0.5): + global useful_tag_list, food_raw_data, non_food_raw_data, useful_dict + useful_tag_list, useful_dict = [], {} + food_tag_dict = count_dict(train_raw_food) + non_food_tag_dict = count_dict(train_raw_non_food) + + if use_all: + for i in non_food_tag_dict.keys(): + if i not in food_tag_dict.keys(): + food_tag_dict[i] = non_food_tag_dict[i] + else: + food_tag_dict[i] += non_food_tag_dict[i] + # food_tag_dict.update(non_food_tag_dict) + + appear_times = 0 + appear_list = [] + for i in food_tag_dict.keys(): + appear_times += food_tag_dict[i] + appear_list.append(food_tag_dict[i]) + appear_list.sort(reverse=True) + useful_bound = int(appear_times * threshold) + bound = 0 + pre_sum = 0 + for i in range(len(appear_list)): + pre_sum += appear_list[i] + if pre_sum > useful_bound: + bound = appear_list[i] + break + for i in food_tag_dict.keys(): + if food_tag_dict[i] > bound: + useful_tag_list.append(i) + counter = 0 + for i in useful_tag_list: + useful_dict[i] = counter + counter += 1 + + +def get_correlation(): + global train_raw_food, correlation_dict + food_tag_dict = count_dict(train_raw_food) + merged_dict = count_dict(train_raw_non_food) + + for i in food_tag_dict.keys(): + if i not in merged_dict.keys(): + merged_dict[i] = food_tag_dict[i] + else: + merged_dict[i] += food_tag_dict[i] + + for i in food_tag_dict.keys(): + if i not in correlation_dict.keys(): + correlation_dict[i] = food_tag_dict[i] / len(food_raw_data) + # correlation_dict[i] = food_tag_dict[i] / merged_dict[i] + else: + print("error in get correlation function") + + +def construct_train_test_set(): + global train_raw_food, train_raw_non_food, test_raw_food, \ + test_raw_non_food, train_vector_x, train_vector_y, \ + test_vector_x, test_vector_y, train_vector_num + train_vector_x, train_vector_y, train_vector_num = [], [], [] + test_vector_x, test_vector_y = [], [] + vector_x = [] + vector_y = [] + for i in train_raw_food: + tmp = [0 for i in range(len(useful_tag_list))] + for j in range(0, len(i) - 1, 2): + if i[j] in useful_dict.keys(): + tmp[useful_dict[i[j]]] = float( + i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1])) + else: + # TODO: should be changed to random probability + pass + vector_x.append(tmp) + vector_y.append("food") + train_vector_num.append(1) + + for i in train_raw_non_food: + tmp = [0 for i in range(len(useful_tag_list))] + for j in range(0, len(i) - 1, 2): + if i[j] in useful_dict.keys(): + tmp[useful_dict[i[j]]] = float( + i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1])) + else: + # TODO: should be changed to random probability + pass + vector_x.append(tmp) + vector_y.append("no food") + train_vector_num.append(-1) + + train_vector_x, train_vector_y = vector_x, vector_y + + vector_x, vector_y = [], [] + + for i in test_raw_food: + tmp = [0 for i in range(len(useful_tag_list))] + for j in range(0, len(i) - 1, 2): + if i[j] in useful_dict.keys(): + tmp[useful_dict[i[j]]] = float( + i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1])) + else: + # TODO: should be changed to random probability + pass + vector_x.append(tmp) + vector_y.append("food") + + for i in test_raw_non_food: + tmp = [0 for i in range(len(useful_tag_list))] + for j in range(0, len(i) - 1, 2): + if i[j] in useful_dict.keys(): + tmp[useful_dict[i[j]]] = float( + i[j + 1]) * correlation_dict[i[j]] + p_food * (1 - float(i[j + 1])) + else: + # TODO: should be changed to random probability + pass + vector_x.append(tmp) + vector_y.append("no food") + test_vector_x, test_vector_y = vector_x, vector_y + + +def confision_matrix(ground_true, predict, print_result=False): + TP, FP, FN, TN = 0, 0, 0, 0 + for i in range(len(ground_true)): + if ground_true[i] == "food" and predict[i] == "food": + TP += 1 + elif ground_true[i] == "no food" and predict[i] == "food": + FP += 1 + elif ground_true[i] == "food" and predict[i] == "no food": + FN += 1 + elif ground_true[i] == "no food" and predict[i] == "no food": + TN += 1 + TPR = TP / (TP + FN) + FPR = FP / (FP + TN) + if print_result: + print("TP: ", TP, "FN: ", FN, "TN: ", TN, "FP :", FP) + # print("Sensitivity = ", TP/(TP+FN), end=" ") + # print("Specificity = ", TN/(TN+FP)) + # print("Precision = ", TP/(TP+FP), end=" ") + # print("Accuracy = ", (TP + TN)/(TP+TN+FN+FP)) + return TPR, FPR + + +def clarifai_result(): + global test_raw_food, test_raw_non_food + TPR_list = [] + FPR_list = [] + for k in range(10): + TP, FP, FN, TN = 0, 0, 0, 0 + ratio = k / 10 + # print(ratio) + for i in test_raw_food: + have = False + for j in range(len(i)): + if i[j] == "food" and float(i[j + 1]) > ratio: + have = True + if not have: + FN += 1 + else: + TP += 1 + + for i in test_raw_non_food: + have = False + for j in range(len(i)): + if i[j] == "food" and float(i[j + 1]) > ratio: + have = True + if not have: + TN += 1 + else: + FP += 1 + + TPR = TP / (TP + FN) + FPR = FP / (FP + TN) + TPR_list.append(TPR) + FPR_list.append(FPR) + if k == 9 or k == 8 or k == 7 or k ==6: + # if k == 7 or k == 6 or k == 5 or k == 4: + plt.scatter([FPR], [TPR], marker='o', c='green') + # print("TRP :", TPR) + # print("FPR :", FPR) + return TPR_list, FPR_list + # + # print("clarify result**********") + # print("TP: ", TP, "FN: ", FN, "TN: ", TN, "FP :", FP) + # print("Sensitivity = ", TP / (TP + FN), end=" ") + # print("Specificity = ", TN / (TN + FP)) + # print("Precision = ", TP / (TP + FP), end=" ") + # print("Accuracy = ", (TP + TN) / (TP + TN + FN + FP)) + # print("burden = ", (TP + FP) / (TP +TN+FN+FP)) + # plt.scatter([1 - 0.789866667, 1 - 0.684, 1 - 0.55786, 1-0.4512], [0.584493042, 0.666003976, 0.753479125, 0.833664679], marker='o', c='green') + + +def get_p_food_before_balance(): + # food_num_ori = 0 + # with open("#food_ori.csv") as f: + # csv_reader = csv.reader(f) + # for row in csv_reader: + # food_num_ori += 1 + + # no_food_num_ori = 0 + # with open("#no food_ori.csv") as f: + # csv_reader = csv.reader(f) + # for row in csv_reader: + # no_food_num_ori += 1 + # p_food = food_num_ori / no_food_num_ori + # return p_food + # p_food = len(train_raw_food) / (len(train_raw_food) + len(train_raw_non_food)) + # return p_food + pass + + +def init(use_all=True): + global p_food, train_raw_food, train_raw_non_food + get_raw_data() + shuffle_raw_data() + div_train_test_raw_data(0.75) + save_raw_data_train_test() + clarifai_result() + p_food = len(train_raw_food) / (len(train_raw_food) + len(train_raw_non_food)) + get_correlation() + get_use_tag(use_all) + construct_train_test_set() +