# (5 HARD) Which team is highest scoring (per game or total)? import pandas df = pandas.read_csv("http://euclid.nmu.edu/~rappleto/Classes/CS295.Python/ncaa.csv") team_score = df['team_score'] opponent_score = df['opponent_score'] team_name = df['team_name'] opponent_name = df['opponent_name'] totals = {} count = {} for i in range(len(df)): if team_name[i] in totals: totals[team_name[i]] += team_score[i] count[team_name[i]]+=1 else: totals[team_name[i]] = team_score[i] count[team_name[i]] =1 avg = {} for team, score in totals.items(): avg[team] = totals[team]/count[team] ordered_scores = sorted(avg.items(), key=lambda x: x[1]) ordered_scores[-1] # (2 HARD) Which companies have more revenue per employee, "Aerospace and Defense" or "Technology"? import pandas import matplotlib.pyplot as plt df = pandas.read_csv("http://euclid.nmu.edu/~rappleto/Classes/CS295.Python/fortune500-small.csv") sector = df['Sector'] rev = df['Revenues'] revBySector = {} emp = df['Employees'] empBySector = {} for i in range(len(sector)): if sector[i] in revBySector: revBySector[sector[i]] += rev[i] empBySector[sector[i]] += emp[i] else: revBySector[sector[i]] = rev[i] empBySector[sector[i]] = emp[i] for s, r in revBySector.items(): ans[s] = round(revBySector[s] / empBySector[s] * 1000000,0) print(ans) sorted(ans.items(), key=lambda x: -x[1]) plt.barh(list(ans.keys()), list(ans.values())) set((df['Sector'])) df.groupby("Sector").sum() s1 = { 1,2,3} s2 = {2, 3, 4} s1.union(s2) s1 - s2 s1 * 3 s1 + s2 s1[2]