# df_nypd = read_csv("https://www.dropbox.com/scl/fi/kf2zk4t1onxzm2vo3lpkq/NYPD_Complaint_Data_Historic.csv?rlkey=ly36vi9v66sno80eir6rohlwn&dl=1", na = "(null)")
df_nypd = read_csv('data/NYPD_Complaint_Data_Historic.csv', na = "(null)") |>
 janitor::clean_names()
df_nypd = df_nypd |> 
  janitor::clean_names() |> 
  mutate(cmplnt_fr_dt = lubridate::mdy(cmplnt_fr_dt)) |> 
  mutate(year = lubridate::year(cmplnt_fr_dt))

Top 10 Crime Types

All Crime Types

Since the dataset encompasses a total of 69 crime types, displaying them all in data visualization may not be visually efficient. To enhance clarity and focus, we will limit our visualization to the top 10 most frequent crime types, as these represent the most significant portion of our dataset.

df_nypd |> 
  group_by(ofns_desc) |> 
  summarize(count = n()) |> 
  arrange(desc(count)) |> 
  mutate(Rank = row_number()) |> 
  rename(`Crime Type` = ofns_desc,
         `Number of Cases` = count) |> 
  kable()
Crime Type Number of Cases Rank
PETIT LARCENY 541926 1
HARRASSMENT 2 432087 2
ASSAULT 3 & RELATED OFFENSES 303502 3
CRIMINAL MISCHIEF & RELATED OF 279340 4
GRAND LARCENY 256120 5
FELONY ASSAULT 131263 6
OFF. AGNST PUB ORD SENSBLTY & 112260 7
ROBBERY 84824 8
MISCELLANEOUS PENAL LAW 83509 9
BURGLARY 78949 10
DANGEROUS DRUGS 75042 11
GRAND LARCENY OF MOTOR VEHICLE 49605 12
VEHICLE AND TRAFFIC LAWS 47947 13
OFFENSES AGAINST PUBLIC ADMINI 40789 14
DANGEROUS WEAPONS 39984 15
SEX CRIMES 38271 16
FORGERY 27503 17
INTOXICATED & IMPAIRED DRIVING 22812 18
THEFT-FRAUD 20660 19
CRIMINAL TRESPASS 16189 20
FRAUDS 12988 21
OFFENSES INVOLVING FRAUD 9874 22
POSSESSION OF STOLEN PROPERTY 9016 23
UNAUTHORIZED USE OF A VEHICLE 8994 24
RAPE 8338 25
OFFENSES AGAINST THE PERSON 6816 26
OTHER OFFENSES RELATED TO THEF 6122 27
ADMINISTRATIVE CODE 5835 28
ARSON 4225 29
NYS LAWS-UNCLASSIFIED FELONY 3461 30
OTHER STATE LAWS (NON PENAL LA 1896 31
MURDER & NON-NEGL. MANSLAUGHTER 1738 32
BURGLAR’S TOOLS 1687 33
THEFT OF SERVICES 1510 34
GAMBLING 1076 35
AGRICULTURE & MRKTS LAW-UNCLASSIFIED 728 36
KIDNAPPING & RELATED OFFENSES 704 37
FRAUDULENT ACCOSTING 701 38
PETIT LARCENY OF MOTOR VEHICLE 688 39
ALCOHOLIC BEVERAGE CONTROL LAW 543 40
PROSTITUTION & RELATED OFFENSES 512 41
OFFENSES AGAINST PUBLIC SAFETY 382 42
OFFENSES RELATED TO CHILDREN 262 43
DISORDERLY CONDUCT 168 44
ENDAN WELFARE INCOMP 145 45
OTHER STATE LAWS 136 46
CHILD ABANDONMENT/NON SUPPORT 128 47
JOSTLING 123 48
CANNABIS RELATED OFFENSES 75 49
HOMICIDE-NEGLIGENT,UNCLASSIFIE 73 50
NA 71 51
KIDNAPPING 58 52
NYS LAWS-UNCLASSIFIED VIOLATION 56 53
ANTICIPATORY OFFENSES 51 54
ESCAPE 3 48 55
LOITERING/GAMBLING (CARDS, DIC 44 56
FELONY SEX CRIMES 40 57
HOMICIDE-NEGLIGENT-VEHICLE 28 58
UNLAWFUL POSS. WEAP. ON SCHOOL 27 59
NEW YORK CITY HEALTH CODE 22 60
INTOXICATED/IMPAIRED DRIVING 20 61
DISRUPTION OF A RELIGIOUS SERV 9 62
OTHER STATE LAWS (NON PENAL LAW) 6 63
ABORTION 2 64
LOITERING 2 65
OFFENSES AGAINST MARRIAGE UNCL 2 66
OTHER TRAFFIC INFRACTION 2 67
FORTUNE TELLING 1 68
KIDNAPPING AND RELATED OFFENSES 1 69

Top 10 Crime Types

# Step 1: Group, count, and sort
df_summary <- df_nypd %>%
  group_by(ky_cd) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Calculate total number of observations
total_n = sum(df_summary$count)

# Determine a cutoff, for example, 80% of the total
cutoff = 80

# Calculate the cumulative sum and find out how many top categories 
# make up for at least 80% of the data
df_summary = df_summary |> 
  mutate(cumulative_percentage = (cumsum(count)/sum(df_summary$count))*100)

position = which.max(df_summary$cumulative_percentage >= cutoff)
# Select the top categories up to this position
df_summary =
  df_summary[1:position, ] 
df_crime_patterns <- df_nypd |> 
  filter(ky_cd %in% df_summary$ky_cd) |> 
  mutate(ofns_desc = ifelse(ky_cd == 361, 
                            "OFF. AGNST PUB ORD SENSBLTY & PLACE FALSE BOMB", 
                            ofns_desc),
         ofns_desc = ifelse(ky_cd == 126, 
                            "MISCELLANEOUS PENAL LAW", 
                            ofns_desc)) |> 
  mutate(cmplnt_fr_tm = lubridate::hms(cmplnt_fr_tm),
         hour = lubridate::hour(cmplnt_fr_tm))
df_summary =
  df_summary |> 
  left_join(df_crime_patterns |> 
              select(ky_cd,ofns_desc) |> 
              distinct(), by = "ky_cd") 
df_summary |> 
  rename(`Crime Code` = ky_cd,
         `Crime Type` = ofns_desc,
         `Number of Cases` = count,
         `Cumulative Percentage (%)` = cumulative_percentage) |>
  kable()
Crime Code Number of Cases Cumulative Percentage (%) Crime Type
341 541926 19.54989 PETIT LARCENY
578 432087 35.13735 HARRASSMENT 2
344 303502 46.08613 ASSAULT 3 & RELATED OFFENSES
109 256120 55.32562 GRAND LARCENY
351 210043 62.90288 CRIMINAL MISCHIEF & RELATED OF
106 131263 67.63817 FELONY ASSAULT
361 112266 71.68815 OFF. AGNST PUB ORD SENSBLTY & PLACE FALSE BOMB
105 84824 74.74816 ROBBERY
126 80377 77.64775 MISCELLANEOUS PENAL LAW
107 78949 80.49582 BURGLARY

The table above illustrates that the top 10 most frequent crime categories account for at least 80% of all crimes in NYC. Our concentration of incidents within these categories provides a focused overview of the primary crime types in the city.

# crime_counts = df_crime_patterns |>
#     group_by(year,ofns_desc) |>
#     summarize(count = n())
# 
# crime_counts |> 
#   write.csv("Top10CrimeMap/data/crime_counts.csv")
# df_summary |>
#   write.csv("Top10CrimeMap/data/df_summary.csv", col.names = TRUE)
# 
# set.seed(123)
# df_crime_patterns|>
#   sample_frac(0.30) |>
#   write.csv("Top10CrimeMap/data/df_crime_patterns_sample.csv", col.names = TRUE)

Number of Cases across Most Frequent 10 Offense Types in Each Year

p_1 <- df_crime_patterns |>
  group_by(ofns_desc, year) |>
  summarize(count = n()) |>
  ggplot(aes(x = year, y = count, fill = ofns_desc)) +
  geom_col(position = "stack") +
  scale_fill_viridis_d() +
  xlab("Year") +
  ylab("Number of Cases") +
  labs(fill = "Offense Type")+
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplotly(p_1)

This data visualization presents a stacked bar chart showing the number of cases for different offense types in New York City over 6 years: 2017-2022. Each color in the bars corresponds to a specific type of offense, as indicated in the legend on the right.

The total number of offenses has a clear upward trend in the number of offenses from 2017 to 2019, followed by a noticeable decline in offenses reported in 2020. This dip could be attributed to the lockdowns, restrictions, and changes in social behavior during the pandemic. Then from 2020 to 2022, there is an apparent increase in offenses again, which could indicate a return to pre-pandemic patterns or a response to evolving social and economic conditions.an increase again to 2022. To learn more about general crime’s temporal change, composition, and their correlations with Borough and Law Category: click here

  • ‘Petit Larceny’ (represented in yellow) remains relatively stable as a significant portion of the offenses.
  • ‘Harassment 2’ (in cyan) seems to have increased slightly in 2020 compared to 2018 and 2022.
  • ‘Assault 3 & Related Offenses’ (in purple) shows an increase in 2020 before dropping in 2022.
  • ‘Grand Larceny’ (in green) also shows some fluctuations, with an increase in 2020 and a decrease in 2022.

(Data Completeness: The visualization does not include all offense types, focusing instead on the top 10, which allows for a clearer analysis of the most significant crime trends)

Number of Cases across Most Frequent 10 Offense Types by Hour in Each Year

p_2 = df_crime_patterns %>%
  group_by(year, ofns_desc, hour) %>%
  summarize(count = n(), .groups = 'drop') %>%
  ggplot(aes(x = hour, y = count, color = ofns_desc)) +
  geom_line() +
  facet_wrap(~year) +
  xlab("Hour") +
  ylab("Count") +
  labs(color = "Offense Type") +
  theme_minimal()

ggplotly(p_2)

The visualization is a plot that shows the number of cases across the most frequent 10 offense types by hour for each year, with separate panels for each year from 2017 to 2022.

  • Daily Patterns: For offenses such as petit larceny, harassment 2, and grand larceny, there is a noticeable increase in cases during the hours of 11 AM to 6 PM. These peaks suggest that these types of offenses occur more frequently during the daytime, potentially aligning with higher population activity and opportunities for these crimes. Across all examined offenses, there is a marked decrease in frequency around the hours of 5 AM and 6 AM, indicating these early morning hours are when offenses are least common. This pattern could be related to lower pedestrian and vehicle traffic, reduced social interaction, and the fact that many potential victims and perpetrators are likely to be at home during these hours.

  • Yearly Differences: By comparing across panels, we can see the patterns hold steady over 2017-2022. This consistency suggests that the hourly trends in criminal activity for these offense types have not undergone significant changes over the years. Such stability in crime patterns might imply that underlying factors influencing crime rates—such as social behavior, economic conditions, and law enforcement practices—have remained relatively constant during this period.

From this visualization, stakeholders such as law enforcement or community leaders could draw insights into when to allocate resources more effectively to prevent or respond to these offenses. Additionally, it could prompt further investigation into why certain offenses peak at specific hours and how this information could inform prevention strategies.

Crime Map

  • Due to limitations in server memory, we are not able to display all offense reports on the heat map. As a result, we’ve randomly selected 30% of all NYPD crime complaints to be visualized on the heat map, ensuring that the performance remains efficient.
  • To offer a thorough analysis of crime trends, we have included all cases in the line graph that accompanies the heat map below.
  • Consequently, the discrepancy in the number of total crime cases each year between the two visual representations is deliberate. This method allows us to balance interactive map responsiveness with a full depiction of crime trends over the years.

This map highlights that Manhattan, Queens, and the Bronx are areas with more crime activities. This could be due to various factors like population density, socio-economic conditions, or policing strategies. An interesting observation is that the Upper East Side in Manhattan has significantly lower crime rates compared to its neighboring areas. This could be attributed to factors like higher income levels, better security measures, or lower population density.