{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# cd/gt03/Summary Table\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To import the module" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'mtbp3cd'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mmtbp3cd\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgt03summary\u001b[39;00m\n", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mtbp3cd'" ] } ], "source": [ "import pandas as pd\n", "from mtbp3cd.util.gt03summary import crosstab_from_lists" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example - 1 Generate data summary tables\n", "\n", "The output includes:\n", "\n", "1. Count of rows of input data by output-col-columns and output-row-columns\n", "2. Normalized counts by perct_within_index\n", "3. Report with format \"count (%)\" \n", "\n", "To create a dataset:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = {\n", " 'A': ['foo','foo', 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'baz', 'baz', 'baz'],\n", " 'B': ['one','one','one','one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n", " 'C': ['y','x','x','x', 'y', 'x', 'y', 'x', 'y', 'x', 'y'],\n", " 'D': ['apple','apple','apple','apple', 'banana', 'apple', 'banana', 'apple', 'banana', 'apple', 'banana'],\n", " 'E': ['red','blue','red','red', 'red', 'blue', 'blue', 'red', 'blue', 'red', 'blue'],\n", " 'value': [0,1,1,1, 2, 3, 4, 5, 6, 7, 8]\n", "}\n", "df = pd.DataFrame(data)\n", "\n", "# Use crosstab_from_lists with 3-level multi-index for rows and columns\n", "rows = ['A', 'B', 'C']\n", "cols = ['D', 'E']" ] }, { "cell_type": "markdown", "metadata": { "vscode": { "languageId": "plaintext" } }, "source": [ "To create a frequency (count) table:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(crosstab_from_lists(df, rows, cols))['count']\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To create a table with row-wise percentage:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(crosstab_from_lists(df, rows, cols, rows, col_margin_perct=True))['report']" ] }, { "cell_type": "markdown", "metadata": { "vscode": { "languageId": "plaintext" } }, "source": [ "To create a table with grouped row-wise percentage:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(crosstab_from_lists(df, rows, cols, ['A','B'], col_margin_perct=True))['report']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To create a table with column-wise percentage:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(crosstab_from_lists(df, rows, cols, cols, row_margin_perct=True))['report']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To create a table with grouped column-wise percentage:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(crosstab_from_lists(df, rows, cols, ['D'], row_margin_perct=True))['report']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To create a table with grouped row-and-column-wise percentage:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(crosstab_from_lists(df, rows, cols, ['A', 'D']))['report']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To create a table with grouped row-and-column-wise percentage:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(crosstab_from_lists(df, rows, cols, ['A', 'D'], row_margin_perct=True, col_margin_perct=True))['report']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To create a table with grouped row-wise percentage and total:\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(crosstab_from_lists(df, rows, cols, ['A','B'], col_margin_perct=True, report_type=2))['report']" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 4 }