diff --git a/grank/core.py b/grank/core.py index e6cdbec..85adea9 100644 --- a/grank/core.py +++ b/grank/core.py @@ -8,7 +8,7 @@ from .libs import helpers from .libs import query -from .script import activity,crawler +from .script import activity,crawler,social @click.group() def main(): @@ -60,6 +60,7 @@ def repo(organization,repo): config = helpers.get_config() data = crawler.fetch_repo_data(organization,repo,config) activity.analyse_repo(organization,repo,data,config) + social.analyse_repo(organization,repo,data,config) pass @main.command() @@ -79,6 +80,5 @@ def clean(): helpers.clean_directory() pass - if __name__ == '__main__': main() \ No newline at end of file diff --git a/grank/libs/helpers.py b/grank/libs/helpers.py index e81bf68..264b12c 100644 --- a/grank/libs/helpers.py +++ b/grank/libs/helpers.py @@ -157,19 +157,34 @@ def export_csv(series,name): """导出 Csv 文件""" series.to_csv("output/%s.csv" % name); -def get_activity_avarage_instance(): +def get_activity_average_instance(): """获取平均值 DF 实例""" if not os.path.isfile("output/activity_average.pkl"): pd.DataFrame(data={'name': [], 'score': []}).to_pickle("output/activity_average.pkl") return pd.read_pickle("output/activity_average.pkl") -def set_activity_avarage(instance,owner,repository,score): +def get_social_average_instance(): + """获取活跃度平均值 DF 实例""" + if not os.path.isfile("output/social_average.pkl"): + pd.DataFrame(data={'name': [], 'score': []}).to_pickle("output/social_average.pkl") + return pd.read_pickle("output/social_average.pkl") + +def set_activity_average(instance,owner,repository,score): """保存中间值,并更新 csv 文件""" instance = instance.append(pd.Series({"owner":owner,"name":repository,"score":score}),ignore_index=True) instance = instance.drop_duplicates(subset=["owner","name"]).sort_values(["score"],ascending=False) instance.to_pickle("output/activity_average.pkl") instance.to_csv("result/activity_rank.csv") +def set_social_average(instance,owner,repository,score): + """保存中间值,并更新 csv 文件""" + + instance = instance.append(pd.Series({"owner":owner,"name":repository,"score":score}),ignore_index=True) + instance = instance.drop_duplicates(subset=["owner","name"]).sort_values(["score"],ascending=False) + + instance.to_pickle("output/social_average.pkl") + instance.to_csv("result/social_rank.csv") + def series_to_pickle(df,name): """将数据保存到 pickle 中""" df.to_pickle("output/%s.pkl" % name) @@ -189,9 +204,31 @@ def generate_activity_line_number(start_time,end_time,top_number): fig.savefig("result/activity_line.png") plt.close(fig) +def generate_social_line_number(start_time,end_time,top_number): + """生成平均值的折线图""" + df = pd.read_pickle("output/social_average.pkl") + all_df = pd.DataFrame(data=[],index=pd.date_range(start=start_time,end=end_time,freq="W")) + + for index, row in df.iterrows(): + if len(all_df.columns) < top_number: + all_df[row["name"]] = pd.read_pickle("output/social_%s.pkl" % row["name"])["score"] + else: + break + + fig = all_df.plot().get_figure() + fig.savefig("result/social_line.png") + plt.close(fig) + def clean_directory(): """清空临时目录及结果目录""" shutil.rmtree('output',ignore_errors=True) shutil.rmtree('result',ignore_errors=True) click.echo("Workspace is empty now!") - pass \ No newline at end of file + pass + +def is_corp(email,config): + """判断是否是企业用户""" + if config["corp"]["keyword"] in email: + return True + else: + return False diff --git a/grank/script/activity.py b/grank/script/activity.py index c67b736..eed4a90 100644 --- a/grank/script/activity.py +++ b/grank/script/activity.py @@ -73,7 +73,7 @@ def analyse_repo(owner,repository,data,config): # 获取平均分实例,用于后续排序 - instance = helpers.get_activity_avarage_instance() + instance = helpers.get_activity_average_instance() # 将项目的活跃分数保存到新的 Pickle 中,用于后续的折线图输出 @@ -81,7 +81,7 @@ def analyse_repo(owner,repository,data,config): # 对平均分实例进行排序 - helpers.set_activity_avarage(instance,owner,repository,target_score) + helpers.set_activity_average(instance,owner,repository,target_score) # 输出项目的 CSV 数据 helpers.export_csv(new_df,"%s" % repository) diff --git a/grank/script/social.py b/grank/script/social.py new file mode 100644 index 0000000..3f7fbd8 --- /dev/null +++ b/grank/script/social.py @@ -0,0 +1,62 @@ +from ..libs import query +from ..libs import helpers +import pandas as pd +import numpy as np +import click +import math +def analyse_repo(owner,repository,data,config): + click.echo("分析社区化") + pullRequestArray = data["pullRequestArray"] + commitArray = data["commitArray"] + + start_time=config["time"]["start_time"] + end_time=config["time"]["end_time"] + date_range = pd.date_range(start=start_time,end=end_time,freq="W") + date_series = pd.Series(np.zeros((len(date_range),), dtype=int),index=date_range) + + + social_all_frame= pd.DataFrame(commitArray) + social_all_frame = social_all_frame[social_all_frame.date != "未标注时间"] + social_all_frame["date"] = pd.to_datetime(social_all_frame['date']) + for index,row in social_all_frame.iterrows(): + social_all_frame.loc[index,"author"] = helpers.is_corp(row["author"],config) + + community_df = social_all_frame[social_all_frame.author != True].set_index('date').resample('W')['times'].sum() + social_all_df = social_all_frame.set_index('date').resample('W')['times'].sum() + + social_all_df = social_all_df.loc[start_time:end_time] + community_df = community_df.loc[start_time:end_time] + + temp_community_series = pd.Series(np.zeros((len(date_range),), dtype=int),index=date_range) + temp_social_series = pd.Series(np.zeros((len(date_range),), dtype=int),index=date_range) + + for item in community_df.index: + if item in date_series.index: + temp_community_series[item] = community_df[item] + + for item in social_all_df.index: + if item in date_series.index: + temp_social_series[item] = social_all_df[item] + + social_df = pd.DataFrame({ + "community_member":temp_community_series.values, + "all_member":temp_social_series.values, + },index = date_range) + social_df = social_df.cumsum() + social_df["score"] = social_df.apply(lambda row: row.community_member / row.all_member , axis=1) + + target_social_score = social_df["score"].sum() / len(social_df) + + instance = helpers.get_social_average_instance() + + helpers.series_to_pickle(social_df,"social_%s" % repository) + + helpers.set_social_average(instance,owner,repository,target_social_score) + + helpers.export_csv(social_df,"social_%s" % repository) + + helpers.generate_social_line_number(start_time,end_time,int(config["rank"]["top"])) + + print(target_social_score) + exit() + pass \ No newline at end of file