Skip to content

Commit

Permalink
add flowchart
Browse files Browse the repository at this point in the history
  • Loading branch information
ChenglongChen committed Jul 12, 2015
1 parent a066248 commit 129c5cd
Show file tree
Hide file tree
Showing 11 changed files with 145 additions and 128 deletions.
6 changes: 3 additions & 3 deletions Code/Feat/run_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@
cmd = "python ./combine_feat_[LSA_and_stats_feat_Jun09]_[Low].py"
os.system(cmd)

# #### combine feat
# cmd = "python ./combine_feat_[LSA_svd150_and_Jaccard_coef_Jun14]_[Low].py"
# os.system(cmd)
#### combine feat
cmd = "python ./combine_feat_[LSA_svd150_and_Jaccard_coef_Jun14]_[Low].py"
os.system(cmd)

#### combine feat
cmd = "python ./combine_feat_[svd100_and_bow_Jun23]_[Low].py"
Expand Down
Binary file added Doc/FlowChart.pdf
Binary file not shown.
Binary file added Doc/FlowChart.pptx
Binary file not shown.
90 changes: 46 additions & 44 deletions Doc/Kaggle_CrowdFlower_ChenglongChen.aux
Original file line number Diff line number Diff line change
Expand Up @@ -15,87 +15,89 @@
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\@writefile{toc}{\contentsline {section}{\numberline {1}Summary}{4}{section.1}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Preprocessing}{4}{section.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Dropping HTML tags}{4}{subsection.2.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Word Replacement}{4}{subsection.2.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.1}Spelling Correction}{4}{subsubsection.2.2.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The flowchart of our method.}}{4}{figure.1}}
\newlabel{fig:Flowchart}{{1}{4}{The flowchart of our method}{figure.1}{}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Preprocessing}{5}{section.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Dropping HTML tags}{5}{subsection.2.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Word Replacement}{5}{subsection.2.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.1}Spelling Correction}{5}{subsubsection.2.2.1}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Spelling Correction}}{5}{table.1}}
\newlabel{tab:spelling_correction}{{1}{5}{Spelling Correction\relax }{table.1}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.2}Synonym Replacement}{5}{subsubsection.2.2.2}}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Synonym Replacement}}{5}{table.2}}
\newlabel{tab:synonym}{{2}{5}{Synonym Replacement\relax }{table.2}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.3}Other Replacements}{5}{subsubsection.2.2.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Stemming}{5}{subsection.2.3}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Feature Extraction/Selection}{5}{section.3}}
\citation{owen}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Synonym Replacement}}{6}{table.2}}
\newlabel{tab:synonym}{{2}{6}{Synonym Replacement\relax }{table.2}{}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Other Replacement}}{6}{table.3}}
\newlabel{tab:Other}{{3}{6}{Other Replacement\relax }{table.3}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Stemming}{6}{subsection.2.3}}
\citation{owen}
\@writefile{toc}{\contentsline {section}{\numberline {3}Feature Extraction/Selection}{7}{section.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Counting Features}{7}{subsection.3.1}}
\newlabel{subsec:Counting_Features}{{3.1}{7}{Counting Features\relax }{subsection.3.1}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.1}Basic Counting Features}{7}{subsubsection.3.1.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.2}Intersect Counting Features}{7}{subsubsection.3.1.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.3}Intersect Position Features}{7}{subsubsection.3.1.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Distance Features}{7}{subsection.3.2}}
\newlabel{subsec:Distance_Features}{{3.2}{7}{Distance Features\relax }{subsection.3.2}{}}
\citation{Otto_1st}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.3}Intersect Position Features}{8}{subsubsection.3.1.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Distance Features}{8}{subsection.3.2}}
\newlabel{subsec:Distance_Features}{{3.2}{8}{Distance Features\relax }{subsection.3.2}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.1}Basic Distance Features}{8}{subsubsection.3.2.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.2}Statistical Distance Features}{8}{subsubsection.3.2.2}}
\newlabel{subsubsec:Statistical_Distance_Features}{{3.2.2}{8}{Statistical Distance Features\relax }{subsubsection.3.2.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}TF-IDF Based Features}{9}{subsection.3.3}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Basic TF-IDF Features}{9}{subsubsection.3.3.1}}
\newlabel{subsubsec:Basic_TFIDF_Features}{{3.3.1}{9}{Basic TF-IDF Features\relax }{subsubsection.3.3.1}{}}
\citation{malware_2nd}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.2}Cooccurrence TF-IDF Features}{10}{subsubsection.3.3.2}}
\newlabel{subsubsec:Cooccurrence_TFIDF_Features}{{3.3.2}{10}{Cooccurrence TF-IDF Features\relax }{subsubsection.3.3.2}{}}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces One sample in \texttt {train.csv}}}{10}{table.4}}
\newlabel{tab:sample_id54}{{4}{10}{One sample in \texttt {train.csv}\relax }{table.4}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Other Features}{10}{subsection.3.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.1}Query Id}{10}{subsubsection.3.4.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.5}Feature Selection}{10}{subsection.3.5}}
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces CV score and LB score}}{11}{table.5}}
\newlabel{CV_LB}{{5}{11}{CV score and LB score\relax }{table.5}{}}
\citation{malware_2nd}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces One sample in \texttt {train.csv}}}{11}{table.4}}
\newlabel{tab:sample_id54}{{4}{11}{One sample in \texttt {train.csv}\relax }{table.4}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Other Features}{11}{subsection.3.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.1}Query Id}{11}{subsubsection.3.4.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.5}Feature Selection}{11}{subsection.3.5}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Modeling Techniques and Training}{11}{section.4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Cross Validation Methodology}{11}{subsection.4.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.1.1}The Split}{11}{subsubsection.4.1.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.1.2}Following the Same Logic}{11}{subsubsection.4.1.2}}
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces CV score and LB score}}{12}{table.5}}
\newlabel{CV_LB}{{5}{12}{CV score and LB score\relax }{table.5}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.1.2}Following the Same Logic}{12}{subsubsection.4.1.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Model Objective and Decoding Method}{12}{subsection.4.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.1}Classification}{12}{subsubsection.4.2.1}}
\newlabel{subsubsec:Classification}{{4.2.1}{12}{Classification\relax }{subsubsection.4.2.1}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.2}Regression}{12}{subsubsection.4.2.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.1}Classification}{13}{subsubsection.4.2.1}}
\newlabel{subsubsec:Classification}{{4.2.1}{13}{Classification\relax }{subsubsection.4.2.1}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.2}Regression}{13}{subsubsection.4.2.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.3}Pairwise Ranking}{13}{subsubsection.4.2.3}}
\citation{ebc}
\citation{cocr}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.3}Pairwise Ranking}{13}{subsubsection.4.2.3}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.4}Oridinal Regression}{13}{subsubsection.4.2.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.5}Softkappa}{13}{subsubsection.4.2.5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Sample Weighting}{13}{subsection.4.3}}
\citation{ensemble_selection}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.4}Oridinal Regression}{14}{subsubsection.4.2.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.5}Softkappa}{14}{subsubsection.4.2.5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Sample Weighting}{14}{subsection.4.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Ensemble Selection}{14}{subsection.4.4}}
\citation{hyperopt}
\citation{hyperopt_url}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Ensemble Selection}{14}{subsection.4.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.1}Model Library Building via Guided Parameter Searching}{14}{subsubsection.4.4.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2}Model Weight Optimization}{14}{subsubsection.4.4.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.3}Randomized Ensemble Selection}{14}{subsubsection.4.4.3}}
\citation{NLTK_Cookbook}
\@writefile{lot}{\contentsline {table}{\numberline {6}{\ignorespaces Model Library}}{15}{table.6}}
\newlabel{tab:Model_Library}{{6}{15}{Model Library\relax }{table.6}{}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Code Description}{15}{section.5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Setting}{15}{subsection.5.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Feature}{15}{subsection.5.2}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces CV mean, Public LB, and Private LB scores of our 35 best Public LB submissions generating with randomized ensemble selection. One standard deviation of the CV score is plotted via error bar.}}{16}{figure.1}}
\newlabel{fig:CV_Public_Private}{{1}{16}{CV mean, Public LB, and Private LB scores of our 35 best Public LB submissions generating with randomized ensemble selection. One standard deviation of the CV score is plotted via error bar}{figure.1}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Model}{17}{subsection.5.3}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Dependencies}{18}{section.6}}
\@writefile{toc}{\contentsline {section}{\numberline {7}How To Generate the Solution (aka README file)}{18}{section.7}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.1}Model Library Building via Guided Parameter Searching}{15}{subsubsection.4.4.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces CV mean, Public LB, and Private LB scores of our 35 best Public LB submissions generating with randomized ensemble selection. One standard deviation of the CV score is plotted via error bar.}}{16}{figure.2}}
\newlabel{fig:CV_Public_Private}{{2}{16}{CV mean, Public LB, and Private LB scores of our 35 best Public LB submissions generating with randomized ensemble selection. One standard deviation of the CV score is plotted via error bar}{figure.2}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2}Model Weight Optimization}{16}{subsubsection.4.4.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.3}Randomized Ensemble Selection}{16}{subsubsection.4.4.3}}
\citation{NLTK_Cookbook}
\@writefile{toc}{\contentsline {section}{\numberline {5}Code Description}{17}{section.5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Setting}{17}{subsection.5.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Feature}{17}{subsection.5.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Model}{18}{subsection.5.3}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Dependencies}{19}{section.6}}
\@writefile{toc}{\contentsline {section}{\numberline {7}How To Generate the Solution (aka README file)}{19}{section.7}}
\bibstyle{plain}
\bibdata{reference}
\bibcite{owen}{1}
\@writefile{toc}{\contentsline {section}{\numberline {8}Additional Comments and Observations}{19}{section.8}}
\@writefile{toc}{\contentsline {section}{\numberline {9}Simple Features and Methods}{19}{section.9}}
\@writefile{toc}{\contentsline {section}{\numberline {10}Acknowledgement}{19}{section.10}}
\bibcite{Otto_1st}{2}
\bibcite{malware_2nd}{3}
\bibcite{hyperopt_url}{4}
\bibcite{hyperopt}{5}
\@writefile{toc}{\contentsline {section}{\numberline {8}Additional Comments and Observations}{20}{section.8}}
\@writefile{toc}{\contentsline {section}{\numberline {9}Simple Features and Methods}{20}{section.9}}
\@writefile{toc}{\contentsline {section}{\numberline {10}Acknowledgement}{20}{section.10}}
\bibcite{ebc}{6}
\bibcite{ensemble_selection}{7}
\bibcite{NLTK_Cookbook}{8}
Expand Down
Loading

0 comments on commit 129c5cd

Please sign in to comment.