Skip to content

Commit

Permalink
py: activity timestamp analysis plot
Browse files Browse the repository at this point in the history
  • Loading branch information
haxscramper committed Mar 14, 2024
1 parent 4c6f258 commit 4ffa2eb
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 16 deletions.
116 changes: 114 additions & 2 deletions scripts/py_cli/py_cli/scratch_scripts/activity_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,26 @@

import py_exporters.export_sqlite as sql
from py_cli.haxorg_cli import *
from beartype.typing import List, Tuple
from beartype.typing import List, Tuple, Any
from beartype import beartype
from py_scriptutils.files import IsNewInput
from sqlalchemy import create_engine, Engine
from py_scriptutils.script_logging import log

from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import select, literal, union_all
from sqlalchemy.orm import sessionmaker
import matplotlib.figure as matplotlib_figure
from py_scriptutils.pandas_utils import dataframe_to_rich_table
from py_scriptutils.sqlalchemy_utils import format_rich_query
from py_scriptutils.rich_utils import render_rich
import matplotlib.dates as mdates
import matplotlib.ticker as mticker

CAT = "example.activity_analysis"


class ActivityAnalysisOptions(BaseModel):
infile: List[Path]
outdir: Path
Expand All @@ -25,6 +36,91 @@ def analysis_options(f):
return apply_options(f, options_from_model(ActivityAnalysisOptions))


@beartype
def plot_timestamped_events_with_pandas(
engine: Engine,
min_time: Optional[datetime] = None,
max_time: Optional[datetime] = None,
bin_size: int = 5,
figsize: Tuple[int, int] = (10, 6),
time_tick_skip: int = 5,
) -> Optional[Tuple[matplotlib_figure.Figure, Any]]:
session = sessionmaker(bind=engine)()

# log(CAT).info(
# render_rich(
# format_rich_query(engine, select(sql.Block.timestamp, literal("Block")))))

union_query = union_all(
select(
sql.PriorityModified.timestamp.label("timestamp"),
literal("PriorityModified").label("event"),
),
select(sql.StateModified.timestamp, literal("StateModified")),
select(sql.TagModified.timestamp, literal("TagModified")),
select(sql.ClockModified.from_.label("timestamp"), literal("ClockModified")),
select(sql.NoteModified.timestamp, literal("NoteModified")),
select(sql.Block.timestamp, literal("Block")),
select(sql.Subtree.created.label("timestamp"), literal("SubtreeCreated")),
select(sql.Subtree.scheduled, literal("SubtreeScheduled")),
select(sql.Subtree.deadline, literal("SubtreeDeadline")),
select(
sql.Subtree.closed,
literal("SubtreeClosed"),
),
).alias("union_query")

query = select(union_query.c.timestamp,
union_query.c.event).where(union_query.c.timestamp.is_not(None))

df = pd.read_sql(query, engine)

if min_time is None or max_time is None:
min_time = df["timestamp"].min() if min_time is None else min_time
max_time = df["timestamp"].max() if max_time is None else max_time

min_time = min_time.replace(hour=0, minute=0, second=0, microsecond=0)
max_time = (max_time + timedelta(days=1)).replace(
hour=0,
minute=0,
second=0,
microsecond=0,
)

bins = pd.date_range(min_time, max_time, freq=f"{bin_size}D")

df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df[(min_time <= df["timestamp"]) & (df["timestamp"] <= max_time)]

df["bin"] = pd.cut(df["timestamp"], bins=bins, right=False)

grouped = df.groupby(["bin", "event"]).size().unstack(fill_value=0)

fig, ax = plt.subplots(figsize=figsize)
grouped.plot.area(stacked=True, ax=ax)

ax.set_xlabel("Time")
ax.set_ylabel("Number of Events")
ax.xaxis_date()
ax.legend(loc="upper left", bbox_to_anchor=(1, 1))

grouped.index = pd.to_datetime(grouped.index.map(lambda it: it.left))
ticklabels = [""] * len(grouped.index)
ticklabels[::time_tick_skip] = [
item.strftime("%Y-%m-%d") for item in grouped.index[::time_tick_skip]
]

ax.xaxis.set_major_formatter(mticker.FixedFormatter(ticklabels))
ax.grid(True)
ax.grid(color='gray', linestyle='--', linewidth=0.5)

fig.autofmt_xdate()
fig.tight_layout()

session.close()
return (fig, ax)


@click.command()
@click.option("--config",
type=click.Path(exists=True),
Expand All @@ -37,6 +133,9 @@ def cli(ctx: click.Context, config: str, **kwargs) -> None:
opts: ActivityAnalysisOptions = ctx.obj["root"]
sql_db = opts.db_path if opts.db_path else opts.outdir.joinpath("db.sqlite")

if not opts.outdir.exists():
opts.outdir.mkdir(parents=True)

if opts.force_db or IsNewInput(input_path=opts.infile, output_path=[sql_db]):
nodes: List[Tuple[org.Org, str]] = []
for file in opts.infile:
Expand All @@ -54,3 +153,16 @@ def cli(ctx: click.Context, config: str, **kwargs) -> None:
else:
engine: Engine = create_engine("sqlite:///" + str(sql_db))
log(CAT).info("No DB update needed")

log(CAT).info("Plotting data")
plot = plot_timestamped_events_with_pandas(
engine,
bin_size=25,
min_time=datetime(year=2017, month=1, day=1),
figsize=(20, 12),
time_tick_skip=1,
)

if plot:
fig, ax = plot
fig.savefig(opts.outdir.joinpath("event_distribution.png"))
16 changes: 8 additions & 8 deletions scripts/py_exporters/py_exporters/export_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class Subtree(Base):
scheduled = DateTimeColumn(nullable=True)
deadline = DateTimeColumn(nullable=True)
closed = DateTimeColumn(nullable=True)
location = ForeignId(name="Location.id")
location = ForeignId(name="Location.id", nullable=True)


class BlockKind(enum.Enum):
Expand All @@ -60,7 +60,7 @@ class Block(Base):
timestamp = DateTimeColumn(nullable=True)
parent = ForeignId(name="Block.id", nullable=True)
wordcount = IntColumn(nullable=True)
location = ForeignId(name="Location.id")
location = ForeignId(name="Location.id", nullable=True)


class ValueEditOperation(enum.Enum):
Expand Down Expand Up @@ -140,7 +140,10 @@ def registerDocument(node: org.Org, engine: Engine, file: str):

counter = 0

def get_location(node: org.Org) -> int:
def get_location(node: org.Org) -> Optional[int]:
if not node.loc:
return None

nonlocal counter
result = file_record.id * 1E6 + counter
counter += 1
Expand Down Expand Up @@ -257,11 +260,8 @@ def getTime(kind: org.SubtreePeriodKind) -> Optional[datetime]:
for time in node.getTimePeriods(
org.IntSetOfSubtreePeriodKindIntVec([kind])):


# if time.kind == org.SubtreePeriodKind.
# log(CAT).info(org.treeRepr(node.title))
if time.getTime().getTimeKind() == org.TimeTimeKind.Static:
result = evalDateTime(time.getTime().getStatic().time)
if time.from_.getTimeKind() == org.TimeTimeKind.Static:
result = evalDateTime(time.from_.getStatic().time)


return result
Expand Down
29 changes: 29 additions & 0 deletions scripts/py_scriptutils/py_scriptutils/pandas_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import pandas as pd
from rich.table import Table
from beartype.typing import List
from beartype import beartype


@beartype
def dataframe_to_rich_table(df: pd.DataFrame, exclude_columns: List[str] = []) -> Table:
table = Table(show_header=True, header_style="bold magenta")
add_index = "index" not in exclude_columns
if add_index:
table.add_column("index")

for column in df.columns:
if column not in exclude_columns:
table.add_column(column)

for index, row in df.iterrows():
row_content = []
if add_index:
row_content.append(str(index))

for col in df.columns:
if col not in exclude_columns:
row_content.append(str(row[col]))

table.add_row(*row_content)

return table
9 changes: 9 additions & 0 deletions scripts/py_scriptutils/py_scriptutils/rich_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from rich.console import Console


def render_rich(it) -> str:
console = Console()
with console.capture() as capture:
console.print(it)

return capture.get()
21 changes: 20 additions & 1 deletion scripts/py_scriptutils/py_scriptutils/sqlalchemy_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from sqlalchemy import Column, Integer, String, ForeignKey, DateTime
from sqlalchemy import create_engine, MetaData, Table as SATable, Engine, inspect
from sqlalchemy.sql import select
from sqlalchemy.sql import select, Executable
from rich.table import Table
from rich.console import Console
from beartype.typing import Optional, List
Expand Down Expand Up @@ -60,6 +60,25 @@ def format_rich_table(engine: Engine,
return rich_table


@beartype
def format_rich_query(
engine: Engine,
query: Executable,
column_labels: List[str] = [],
) -> Table:

rich_table = Table(show_header=True, header_style="bold blue")
with engine.connect() as connection:
result = connection.execute(query)
if not column_labels:
column_labels = result.keys()
for label in column_labels:
rich_table.add_column(label)
for row in result:
rich_table.add_row(*[str(item) for item in row])
return rich_table


@beartype
def get_table_names(engine: Engine, excluded_tables: List[str] = []) -> List[str]:
"""
Expand Down
3 changes: 0 additions & 3 deletions src/sem/SemConvert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1000,8 +1000,6 @@ SemId<LatexBody> OrgConverter::convertMath(__args) {

SemId<Include> OrgConverter::convertInclude(__args) {
SemId<Include> include = Sem<Include>(a);
LOG(WARNING) << "TODO implement include";

return include;
}

Expand Down Expand Up @@ -1295,7 +1293,6 @@ SemId<Document> OrgConverter::toDocument(OrgAdapter adapter) {
auto __trace = trace(adapter, fmt1(sub.getKind()));
switch (sub.kind()) {
case org::Columns: {
LOG(WARNING) << "TODO: Skipping 'columns' node";
break;
}
case org::CommandTitle: {
Expand Down
7 changes: 5 additions & 2 deletions tests/python/test_example_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ def test_base_activity_analysis():
f"--outdir={dir}",
])

if result.exception:
raise result.exception

assert result.exit_code == 0, result.output
assert db_file.exists()
engine = open_sqlite(db_file)
Expand Down Expand Up @@ -141,11 +144,11 @@ def test_activity_notes_collection():

runner = CliRunner()
opts = [
*[f"--infile={it}" for it in dir.glob("*.org")][:3],
*[f"--infile={it}" for it in dir.glob("*.org")],
f"--db_path={db_file}",
# "--cachedir=/tmp/activity_cache",
"--outdir=/tmp/activity_analysis",
"--force_db=True",
# "--force_db=True",
]

result = runner.invoke(activity_analysis.cli, opts)
Expand Down

0 comments on commit 4ffa2eb

Please sign in to comment.