Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
edx-platform-release
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Package Registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
code.vt.edu will be down for maintenance from 0530-0630 EDT Wednesday, March 26th
Show more breadcrumbs
Hsin-Yu Chien
edx-platform-release
Commits
b6993089
Commit
b6993089
authored
5 years ago
by
Hassan Javeed
Browse files
Options
Downloads
Patches
Plain Diff
Added management command to add history from snapshot.
parent
f20a064a
Branches
Branches containing commit
Tags
release-2020-08-26-16.45
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
openedx/core/djangoapps/content/course_overviews/management/commands/backfill_history.py
+171
-0
171 additions, 0 deletions
.../course_overviews/management/commands/backfill_history.py
with
171 additions
and
0 deletions
openedx/core/djangoapps/content/course_overviews/management/commands/backfill_history.py
0 → 100644
+
171
−
0
View file @
b6993089
# pylint: skip-file
"""
Management command to backfill history.
"""
import
csv
import
logging
import
os
import
time
from
io
import
open
from
django.core.management.base
import
BaseCommand
from
django.db
import
connection
,
transaction
log
=
logging
.
getLogger
(
__name__
)
class
Command
(
BaseCommand
):
"""
Backfill history for models using django-simple-history.
Example usage:
$ ./manage.py lms backfill_history --batch_size 1000 --sleep_between 1 --input_root /tmp/data/ --settings=devstack
"""
help
=
(
"
Populates the historical records with snapshot data.
"
)
DEFAULT_SIZE
=
1000
DEFAULT_SLEEP_BETWEEN_INSERTS
=
1
DATE
=
'
2019-06-29
'
HISTORY_USER_ID
=
None
HISTORY_CHANGE_REASON
=
'
initial history population
'
TABLES
=
[
{
'
name
'
:
'
course_modes_coursemode
'
,
'
exclude_column
'
:
None
,
'
input_filename
'
:
'
course_modes_coursemode_2019-06-29
'
},
]
def
add_arguments
(
self
,
parser
):
super
(
Command
,
self
).
add_arguments
(
parser
)
parser
.
add_argument
(
'
--sleep_between
'
,
default
=
self
.
DEFAULT_SLEEP_BETWEEN_INSERTS
,
type
=
float
,
help
=
'
Seconds to sleep between chunked inserts.
'
)
parser
.
add_argument
(
"
--batch_size
"
,
action
=
"
store
"
,
default
=
self
.
DEFAULT_SIZE
,
type
=
int
,
help
=
"
Maximum number of rows per insert.
"
,
)
parser
.
add_argument
(
"
--input_root
"
,
action
=
"
store
"
,
help
=
"
Path containing data files from snapshot for history backfill
"
)
def
chunks
(
self
,
ids
,
chunk_size
):
for
i
in
range
(
0
,
len
(
ids
),
chunk_size
):
yield
ids
[
i
:
i
+
chunk_size
]
def
replace_values
(
self
,
values
,
original
,
replacement
):
values
=
[[
replacement
if
v
==
original
else
v
for
v
in
value
]
for
value
in
values
]
return
values
def
handle
(
self
,
*
args
,
**
options
):
batch_size
=
options
[
'
batch_size
'
]
sleep_between
=
options
[
'
sleep_between
'
]
input_root
=
options
[
'
input_root
'
]
for
table_info
in
self
.
TABLES
:
table
=
table_info
[
'
name
'
]
historical_table
=
"
_historical
"
.
join
(
table
.
rsplit
(
'
_
'
,
1
))
exclude_column
=
table_info
[
'
exclude_column
'
]
input_filename
=
table_info
[
'
input_filename
'
]
file_path
=
os
.
path
.
join
(
input_root
,
input_filename
)
history_date
=
input_filename
.
rsplit
(
'
_
'
)[
-
1
]
with
connection
.
cursor
()
as
cursor
:
query
=
u
"""
SELECT
column_name
FROM information_schema.columns
WHERE table_name=
'
{}
'
ORDER BY ordinal_position
"""
.
format
(
table
)
cursor
.
execute
(
query
)
columns
=
[
column
[
0
]
for
column
in
cursor
.
fetchall
()]
if
exclude_column
in
columns
:
columns
.
remove
(
exclude_column
)
with
open
(
file_path
,
'
r
'
)
as
input_file
:
reader
=
csv
.
DictReader
(
input_file
,
delimiter
=
'
\x01
'
)
lines
=
list
(
reader
)
for
rows
in
self
.
chunks
(
lines
,
batch_size
):
row_ids
=
[
row
[
'
ID
'
]
for
row
in
rows
]
if
table
==
'
course_overviews_courseoverview
'
:
ids
=
'
,
'
.
join
(
"'
{}
'"
.
format
(
id
)
for
id
in
row_ids
)
else
:
ids
=
'
,
'
.
join
(
row_ids
)
# Checks for existing historical records
with
connection
.
cursor
()
as
cursor
:
query
=
u
"""
SELECT COUNT(1)
FROM {historical_table}
WHERE ID in ({ids})
AND history_type=
'
+
'
"""
.
format
(
historical_table
=
historical_table
,
ids
=
ids
)
# noqa
cursor
.
execute
(
query
)
count
=
cursor
.
fetchone
()[
0
]
if
count
==
len
(
rows
):
log
.
info
(
u
"
Initial history records already exist for ids %s..%s - skipping.
"
,
'
,
'
.
join
(
row_ids
[:
2
]),
'
,
'
.
join
(
row_ids
[
-
2
:])
)
continue
elif
count
!=
0
:
raise
Exception
(
u
"
Database count: %s does not match input count: %s
"
%
(
count
,
len
(
ids
)))
values
=
[[
row
[
column
.
upper
()]
for
column
in
columns
]
for
row
in
rows
]
# Replace 'NULL' with None
values
=
self
.
replace_values
(
values
,
'
NULL
'
,
None
)
# Replace 'true' with True
values
=
self
.
replace_values
(
values
,
'
true
'
,
True
)
# Replace 'false' with False
values
=
self
.
replace_values
(
values
,
'
false
'
,
False
)
# Add history columns data
for
value
in
values
:
value
.
extend
([
history_date
,
self
.
HISTORY_CHANGE_REASON
,
'
+
'
,
self
.
HISTORY_USER_ID
])
# Convert to tuple
values
=
[
tuple
(
value
)
for
value
in
values
]
quoted_columns
=
[
'
`{}`
'
.
format
(
c
)
for
c
in
columns
]
with
transaction
.
atomic
():
with
connection
.
cursor
()
as
cursor
:
log
.
info
(
u
"
Inserting historical records for %s starting with id %s to %s
"
,
table
,
row_ids
[
0
],
row_ids
[
-
1
]
)
query
=
u
"""
INSERT INTO {historical_table}(
{insert_columns},`history_date`,`history_change_reason`,`history_type`,`history_user_id`
)
VALUES ({placeholder})
"""
.
format
(
historical_table
=
historical_table
,
insert_columns
=
'
,
'
.
join
(
quoted_columns
),
placeholder
=
'
,
'
.
join
([
'
%s
'
]
*
(
len
(
columns
)
+
4
))
)
# noqa
cursor
.
executemany
(
query
,
values
)
log
.
info
(
u
"
Sleeping %s seconds...
"
,
sleep_between
)
time
.
sleep
(
sleep_between
)
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment