food_detection/get_dataset.ipynb
2021-02-11 13:34:23 -05:00

114 lines
3.0 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import csv\n",
"from sklearn.utils import shuffle\n",
"food_raw_data, non_food_raw_data, food_rectify = [], [], []\n",
"food_data, non_food_data = [], []\n",
"\n",
"with open(\"food_rectify.csv\") as f:\n",
" reader = csv.reader(f)\n",
" for row in reader:\n",
" food_rectify.append(row)\n",
"\n",
"with open(\"food.csv\") as f:\n",
" reader = csv.reader(f)\n",
" for row in reader:\n",
" food_raw_data.append(row)\n",
" \n",
"with open(\"no_food.csv\") as f:\n",
" reader = csv.reader(f)\n",
" for row in reader:\n",
" non_food_raw_data.append(row)\n",
" \n",
"food_data = food_raw_data\n",
"\n",
"for i in non_food_raw_data:\n",
" if i[0] not in food_rectify:\n",
" non_food_data.append(i)\n",
" else:\n",
" food_data.append(i)\n",
"\n",
"food_data = shuffle(food_data)\n",
"non_food_data = shuffle(non_food_data)\n",
"\n",
"ratio = 0.75 \n",
"train_food_len = int(len(food_data) * ratio)\n",
"train_non_food_len = train_food_len\n",
"\n",
"test_food_len = len(food_data) - train_food_len\n",
"test_non_food_len = int(len(non_food_data) * (1 - ratio))\n",
"\n",
"\n",
"train_food = food_data[0:train_food_len]\n",
"test_food = food_data[train_food_len:train_food_len + test_food_len]\n",
"\n",
"train_non_food = non_food_data[0:train_non_food_len]\n",
"test_non_food = non_food_data[train_non_food_len:train_non_food_len + test_non_food_len]\n",
"\n",
"with open('train_food.csv', 'w') as f:\n",
" write = csv.writer(f)\n",
" write.writerows(train_food)\n",
" \n",
"with open('train_non_food.csv', 'w') as f:\n",
" write = csv.writer(f)\n",
" write.writerows(train_non_food )\n",
"\n",
"with open('test_food.csv', 'w') as f:\n",
" write = csv.writer(f)\n",
" write.writerows(test_food )\n",
"with open('test_non_food.csv', 'w') as f:\n",
" write = csv.writer(f)\n",
" write.writerows(test_non_food)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"a = [i for i in range(10)]\n",
"print(a)\n",
"print(a[0:4])\n",
"print(a[4:7])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"non_food_raw_data"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}